AJMR-Python-Baird/NTC_DFM/EFDC_TempCompare_AJMR.py

767 lines
34 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
@author: aforsythe
Copied from "P:/11934.201 Newtown Creek TPP Privileged and Confidential/05_Analyses/07 ADCP/NC_CurrentMeter_All_Phase1_all_data_2012_05_20/ADCP_Plot_v4.py"
_AJMR
"""
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import geopandas as gp
gp.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
import scipy as sp
import scipy.ndimage
#from astropy.convolution import Gaussian2DKernel
import contextily as ctx
import os
from shapely.geometry import Point
import pickle
# %% Read in data
dataPath = '//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/05_Analyses/07 ADCP/NC_CurrentMeter_All_Phase1_all_data_2012_05_20/'
df = pd.read_excel('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/05 Currents/NC_CurrentMeter_All_Phase1_all_data_2013_07_16/NC_CurrentMeter_All_Phase1_all_mobile_2013_05_02.xlsx',
sheet_name='mag_all_mobile')
# Convert to geodataframe
gdf = gp.GeoDataFrame(df, geometry=gp.points_from_xy(df.Longitude, df.Latitude, crs="EPSG:4326"))
#convert data to CRS of D3D
gdf.geometry = gdf.geometry.to_crs("EPSG:32118")
# Add new cols to geodataframe
gdf['Distance'] = np.zeros([len(gdf), 1])
gdf['DistanceSmth'] = np.zeros([len(gdf), 1])
gdf['dist'] = np.zeros([len(gdf), 1])
gdf['date'] = np.zeros([len(gdf), 1])
# Set Date
gdf['date'] = gdf['year'].astype(str) + '-' + gdf['mon'].astype(str).str.zfill(2) + '-' + gdf['day'].astype(
str).str.zfill(2)
# Column names to distances data for plotting
depths1 = gdf.columns[11:43] # ADCP sensor depth reading column names to list
depths = np.array([float(d.replace('m', '')) for d in depths1]) * -1
#loop through all transects
transectCount = 0
transects = gdf['transect'].unique()
kernel = Gaussian2DKernel(x_stddev=0.25)
for transect_id in range(1, 2): #transects:
# Select a given trasect
tMask = (gdf['transect']==transect_id)
# Remove rows without locations
tMask[pd.isna(gdf['Latitude'])] = False
# Find distance betwen points in m
gdf.loc[tMask, 'Distance'] = gdf[tMask].distance(gdf[tMask].shift())
# Many of the points are recorded as being at the same location...
# Where the distance is zero, set it to half of the following distance
# Find zeros and iloc after zeros
zeroDist = (gdf['Distance'] == 0) & tMask
zeroDistShift = zeroDist.shift()
zeroDistShift.iloc[0] = False
distShift = gdf['Distance'].shift(-1)
distShift.iloc[-1] = False
gdf.loc[tMask, 'DistanceSmth'] = gdf['Distance'][tMask]
# Set zeros to half of following
gdf.loc[zeroDist, 'DistanceSmth'] = distShift[zeroDist]/2
# Set following to half
gdf.loc[zeroDistShift, 'DistanceSmth'] = gdf['Distance']/2
# Set initial to zero
gdf.loc[gdf[tMask].index[0], 'DistanceSmth'] = 0
# If final location is duplicate, set to previous value
if gdf.loc[gdf[tMask].index[-1], 'Distance']==0:
gdf.loc[gdf[tMask].index[-1], 'DistanceSmth'] = gdf.loc[gdf[tMask].index[-2], 'DistanceSmth']
# Get cumulative sum of distances
gdf.loc[tMask, 'dist'] = gdf.loc[tMask, 'DistanceSmth'].cumsum()
# get velocity data all rows columns 11-43
V = gdf.values[tMask, 11:43].astype(float)
# #__________________________________________________________________________________________________________________________
# %% Plotting
if transectCount == 0:
fig, axes = plt.subplots(nrows=5, ncols=4, figsize=(16, 8))
fig.tight_layout(pad=2.5)
ax = axes.flat
colormap = 'jet'
vmin=0
vmax=0.5
VS = sp.ndimage.filters.gaussian_filter(V, [1, 1], mode='nearest')
# vDatEnd = (~np.isnan(V)).cumsum(1).argmax(1).astype(int) + 1
# VS = convolve(V, kernel)
# for i in range(0, len(VS)):
# VS[i, vDatEnd[i]:-1] = np.nan
pltDat = ax[transectCount].pcolormesh(gdf.loc[tMask, 'dist'], depths, np.transpose(VS),
shading='auto', vmin=vmin, vmax=vmax, cmap=colormap)
cbar = fig.colorbar(pltDat, ax=ax[transectCount],
shrink=0.95,aspect=5)
cbar.set_label('Magnitude [m/s]')
ax[transectCount].set_xlabel('Distance Along Transect [m]')
ax[transectCount].set_ylabel('Depth below WSL [m]')
stationStart = next((i for i, j in enumerate(tMask) if j), None)
ax[transectCount].set_title(gdf.loc[stationStart, 'station'])
ax[transectCount].set_ylim(-6, 0)
transectCount = transectCount + 1
plt.show()
# fig.savefig('C:/Users/arey/files/Projects/Newtown/DataFigs/Transects221-241.png',
# bbox_inches='tight', dpi=300)
# %% Save Transects
gdf.loc[:, df.columns != 'geometry'].to_xarray().to_netcdf(
'C:/Users/arey/files/Projects/Newtown/DataFigs/NetCDF/Transects.nc')
# %% Load in moored data
df_moored_data = pd.read_excel('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/05 Currents/NC_CurrentMeter_All_Phase1_all_data_2013_07_16/NC_CurrentMeter_All_Phase1_all_moored_2013_05_20.xlsx',
sheet_name='mag_all_moored')
# Shift col names to put second back in
colIN = list(df_moored_data.columns)
colOUT = colIN[0:6] + ['second'] + colIN[6:-1]
df_moored_data.columns = colOUT
### Moored current meter locations from here:
### file://srv-ott3/Projects/11934.201%20Newtown%20Creek%20TPP%20%E2%80%93%20Privileged%20and%20Confidential/03_Data/01_BkgrdReports/NC_DRAFT_DSR_Submittal_No_3_2013-07-03.pdf
### Table 3-1, PDF page 65
### Locations change between deployment 1-9 and 10/11
df_moored = pd.DataFrame(
{'depOrig': ['NC083CM', 'NC081CM', 'NC082CM', 'NC086CM', 'EK023CM'],
'depCurr': ['NC086CM', 'NC087CM', 'NC088CM', 'NC089CM', 'EK023CM'],
'Northing': [208519.95, 206083.24, 203835.10, 201381.55, 200827.33],
'Easting': [996198.97, 1000959.45, 1004387.42, 1005283.07, 1004644.90],
'bedElev': [-16, -17, -20, -18, -20]})
gdf_moored_loc = gp.GeoDataFrame(
df_moored, geometry=gp.points_from_xy(df_moored.Easting, df_moored.Northing), crs="EPSG:2263")
gdf_moored_loc.geometry = gdf_moored_loc.geometry.to_crs("EPSG:32118")
# Loop through Station IDs for deployments 1-9 and assign locations
# for d in range(1,10):
# depMask = df_moored_data['deployment'] == ('dep' + str(d))
# for stationIDX, station in enumerate(df_moored['depOrig']):
# stationMask = (df_moored_data['station'] == station) & depMask
#
# # Assign geography to station plus deployment
# df_moored_data.loc[stationMask, 'Northing'] = df_moored.loc[stationIDX, 'Northing']
# df_moored_data.loc[stationMask, 'Easting'] = df_moored.loc[stationIDX, 'Easting']
# Loop through Station IDs for deployments 10-11 and assign locations
for d in range(1,12):
depMask = df_moored_data['deployment'] == 'dep' + str(d)
for stationIDX, station in enumerate(df_moored['depCurr']):
stationMask = (df_moored_data['station'] == station) & depMask
# Assign geography to station plus deployment
df_moored_data.loc[stationMask, 'Northing'] = df_moored.loc[stationIDX, 'Northing']
df_moored_data.loc[stationMask, 'Easting'] = df_moored.loc[stationIDX, 'Easting']
# Create geodataframe and convert to USSP
gdf_moored = gp.GeoDataFrame(
df_moored_data, geometry=gp.points_from_xy(df_moored_data.Easting, df_moored_data.Northing), crs="EPSG:2263")
#convert data to CRS of D3D
gdf_moored.geometry = gdf_moored.geometry.to_crs("EPSG:32118")
gdf_moored['date'] = pd.to_datetime(gdf_moored['year'].astype(str) + '-' +
gdf_moored['month'].astype(str).str.zfill(2) + '-' +
gdf_moored['day'].astype(str).str.zfill(2) + ' ' +
gdf_moored['hour'].astype(str).str.zfill(2) + ':' +
gdf_moored['minute'].astype(str).str.zfill(2))
# %% ADCP1
gdf_moored.iloc[:, 1:-2].to_xarray().to_netcdf(
'C:/Users/arey/files/Projects/Newtown/DataFigs/NetCDF/ADCP1.nc')
# %% Plot moored data
# Column names to distances data for plotting
depths1 = gdf_moored.columns[8:51] # ADCP sensor depth reading column names to list
depths_moored = np.array([float(d.replace('m', '')) for d in depths1])
colormap = 'jet'
vmin = 0
vmax = 0.2
fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(16, 8))
fig.tight_layout(pad=2)
# Loop through Station IDs for deployments 1-9 and assign locations
for d in range(1,12):
depMask = gdf_moored['deployment'] == ('dep' + str(d))
for stationIDX, station in enumerate(df_moored['depCurr']):
stationMask = (gdf_moored['station'] == station) & depMask
V = gdf_moored.values[stationMask, 8:51].astype(float)
if len(gdf_moored.loc[stationMask, 'date']) != 0:
pltDat = axes[stationIDX].pcolormesh(gdf_moored.loc[stationMask, 'date'],
depths_moored, np.transpose(V),
shading='auto', vmin=vmin, vmax=vmax, cmap=colormap)
axes[stationIDX].set_ylim(0, 7)
fmt_half_year = mdates.MonthLocator(interval=1)
axes[stationIDX].xaxis.set_major_locator(fmt_half_year)
axes[stationIDX].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
if d == 1:
axes[stationIDX].set_ylabel('Elevation [m]')
axes[stationIDX].set_title(station)
cbar = fig.colorbar(pltDat, ax=axes[stationIDX],
shrink=1.1, aspect=3)
cbar.set_label('Magnitude [m/s]')
pltDat.set_clim([0, 0.2])
plt.show()
# fig.savefig('C:/Users/arey/files/Projects/Newtown/DataFigs/ADCP_2012.png',
# bbox_inches='tight', dpi=300)
# %% Load in ADCP2 data
# Read in locations
df_adcp2_locs = pd.read_excel('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/05 Currents/NCP2_ADCP_D1-D3/AQ_LocationsSO_ADCP_ADV_overview_Coords_20150126_AJMR.xlsx',
sheet_name='ADCP')
gdf_adcp2_locs = gp.GeoDataFrame(df_adcp2_locs,
geometry=gp.points_from_xy(df_adcp2_locs.X_NYSPLI, df_adcp2_locs.Y_NYSPLI), crs="EPSG:2263")
gdf_adcp2_locs = gdf_adcp2_locs.to_crs("EPSG:32118")
adcp2_data_path = '//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/05 Currents/'
adcp2_paths = ['NCP2_ADCP_D1-D3/ADCP_D1_070314_090814', 'NCP2_ADCP_D1-D3/ADCP_D2_090914_110214',
'NCP2_ADCP_D1-D3/ADCP_D3_110414_010615', 'NCP2_ADCP_D4-D5/D4_010715_030315',
'NCP2_ADCP_D4-D5/D5_030415_050515']
adcp2_gdfs = dict()
for depIDX, adcp2_path in enumerate(adcp2_paths):
adcp2_files = os.listdir(adcp2_data_path + adcp2_path) # returns list of files in adv folder
for adcp2_file in adcp2_files:
if '.xls' in adcp2_file or '.csv' in adcp2_file:
if '.xls' in adcp2_file:
df_in = pd.read_excel(adcp2_data_path + adcp2_path + '/' + adcp2_file)
else:
df_in = pd.read_csv(adcp2_data_path + adcp2_path + '/' + adcp2_file)
for stationIDX, station in enumerate(df_adcp2_locs['parent_loc_code']):
advStrIDX = adcp2_file.find('_')+1
if adcp2_file[advStrIDX:advStrIDX+3] in station:
adcp2_geo_x = np.ones([len(df_in), 1]) * df_adcp2_locs.X_NYSPLI[stationIDX]
adcp2_geo_y = np.ones([len(df_in), 1]) * df_adcp2_locs.Y_NYSPLI[stationIDX]
df_in['date'] = pd.to_datetime(df_in['Year'].astype(str) + '-' + df_in['Month'].astype(
str).str.zfill(2) + '-' + df_in['Day'].astype(
str).str.zfill(2) + ' ' + df_in['Hour'].astype(
str).str.zfill(2) + ':' + df_in['Minute'].astype(
str).str.zfill(2) + ':' + df_in['Second'].astype(str).str.zfill(2))
gdf_in = gp.GeoDataFrame(
df_in, geometry=gp.points_from_xy(adcp2_geo_x, adcp2_geo_y), crs="EPSG:2263")
if adcp2_file[advStrIDX:advStrIDX + 3] not in adcp2_gdfs:
adcp2_gdfs[adcp2_file[advStrIDX:advStrIDX + 3]] = dict()
if adcp2_file[0:advStrIDX-1] not in adcp2_gdfs[adcp2_file[advStrIDX:advStrIDX + 3]]:
adcp2_gdfs[adcp2_file[advStrIDX:advStrIDX + 3]][adcp2_file[0:advStrIDX - 1]] = dict()
if depIDX+1 not in adcp2_gdfs[adcp2_file[advStrIDX:advStrIDX + 3]][adcp2_file[0:advStrIDX - 1]]:
adcp2_gdfs[adcp2_file[advStrIDX:advStrIDX + 3]][adcp2_file[0:advStrIDX - 1]][depIDX+1] = gdf_in
# %% ADCP2
for stationIDX, stat in enumerate(adcp2_gdfs):
for varIDX, var in enumerate(adcp2_gdfs[stat]):
for depIDX, depdat in enumerate(adcp2_gdfs[stat][var]):
ncDat = adcp2_gdfs[stat][var][depdat].iloc[:, 1:-2]
ncDat.to_xarray().to_netcdf(
'C:/Users/arey/files/Projects/Newtown/DataFigs/NetCDF/ADCP2/' +
stat + '_' + var + '_d' + str(depdat) + '.nc')
gdf_adcp2_locs.to_csv('C:/Users/arey/files/Projects/Newtown/DataFigs/NetCDF/ADCP2/ADCP2locations.csv')
# %% Plot ADCP2 Data
fig, axes = plt.subplots(nrows=6, ncols=1, figsize=(9, 8))
fig.tight_layout(pad=2)
colormap = 'jet'
vmin = 0
vmax = 0.2
for stationIDX, stat in enumerate(adcp2_gdfs):
for depIDX, depdat in enumerate(adcp2_gdfs[stat]['mag']):
depths1 = adcp2_gdfs[stat]['mag'][depdat].columns[6:-2] # ADCP sensor depth reading column names to list
depths_moored = np.array([float(d.replace('m', '')) for d in depths1])
V = adcp2_gdfs[stat]['mag'][depdat].values[:, 6:-2].astype(float)
pltDat = axes[stationIDX].pcolormesh(adcp2_gdfs[stat]['mag'][depdat].loc[:, 'date'],
depths_moored, np.transpose(V),
shading='auto', vmin=vmin, vmax=vmax, cmap=colormap)
axes[stationIDX].set_ylim(0, 7)
axes[stationIDX].set_xlim(pd.to_datetime("2014-07-01"), pd.to_datetime('2015-05-15'))
#axes[stationIDX, depIDX].format_xdata = mdates.DateFormatter('%Y-%m')
fmt_half_year = mdates.MonthLocator(interval=1)
axes[stationIDX].xaxis.set_major_locator(fmt_half_year)
axes[stationIDX].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
axes[stationIDX].set_title(str(stat))
axes[stationIDX].set_ylabel('Elevation [m]')
cbar = fig.colorbar(pltDat, ax=axes[stationIDX],
shrink=1.1, aspect=3)
cbar.set_label('Magnitude [m/s]')
pltDat.set_clim([0, 0.2])
plt.show()
# fig.savefig('C:/Users/arey/files/Projects/Newtown/DataFigs/ADCP_2014.png',
# bbox_inches='tight', dpi=300)
# %% Load in Water Level Data
# Gauge Locations from map
gdf_gaugeLoc = gp.read_file('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/NTC6.kml')
gdf_gaugeLocUSSP = gdf_gaugeLoc.to_crs("EPSG:32118")
# All in Feet NAVD88
df_wl_FFG = pd.read_excel('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/04 Gauge Data/NCP1_Gauge_Elev_Data_20130521/NC_Gauge_Elev_Data_20130521.xlsx',
sheet_name='Field_Facility_Gauge')
gdf_wl_FFG = gp.GeoDataFrame(df_wl_FFG,
geometry=gp.points_from_xy(np.ones([len(df_wl_FFG), 1]) * gdf_gaugeLoc['geometry'].x[2],
np.ones([len(df_wl_FFG), 1]) * gdf_gaugeLoc['geometry'].y[2]), crs="EPSG:4326")
gdf_wl_FFG.geometry = gdf_wl_FFG.geometry.to_crs("EPSG:32118")
df_wl_NGG1 = pd.read_excel('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/04 Gauge Data/NCP1_Gauge_Elev_Data_20130521/NC_Gauge_Elev_Data_20130521.xlsx',
sheet_name='National_Grid_Gauge')
gdf_wl_NGG1 = gp.GeoDataFrame(df_wl_NGG1,
geometry=gp.points_from_xy(np.ones([len(df_wl_NGG1), 1]) * gdf_gaugeLoc['geometry'].x[3],
np.ones([len(df_wl_NGG1), 1]) * gdf_gaugeLoc['geometry'].y[3]), crs="EPSG:4326")
gdf_wl_NGG1.geometry = gdf_wl_NGG1.geometry.to_crs("EPSG:32118")
# Also includes temperature
df_wl_NGG2 = pd.read_excel('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/04 Gauge Data/NCP2_NG_TideGauge_20160113/NCP2_NG_TideGauge_20160113.xlsx',
sheet_name='Sheet1')
gdf_wl_NGG2 = gp.GeoDataFrame(df_wl_NGG2,
geometry=gp.points_from_xy(np.ones([len(df_wl_NGG2), 1]) * gdf_gaugeLoc['geometry'].x[3],
np.ones([len(df_wl_NGG2), 1]) * gdf_gaugeLoc['geometry'].y[3]), crs="EPSG:4326")
gdf_wl_NGG2.geometry = gdf_wl_NGG2.geometry.to_crs("EPSG:32118")
gdf_wl_FFG.to_csv('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/00_ProcessingCode/NetCDF/Gauge/FieldFacility.csv')
gdf_wl_NGG1.to_csv('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/00_ProcessingCode/NetCDF/Gauge/NationalGrid1.csv')
gdf_wl_NGG2.to_csv('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/00_ProcessingCode/NetCDF/Gauge/NationalGrid2.csv')
# %% Plot Water Level Data
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(6, 4))
axes.plot(gdf_wl_FFG.Date_time,
gdf_wl_FFG['Water Surface Elevation (ft)']*0.3048,
label='Field Facility Gauge')
axes.plot(gdf_wl_NGG1.Date_time,
gdf_wl_NGG1['Water Surface Elevation (ft)']*0.3048,
label='National Grid Gauge Deployment 1')
axes.plot(gdf_wl_NGG2.datetime,
gdf_wl_NGG2['water_surface_elevation']*0.3048,
label='National Grid Gauge Deployment 2')
fmt_half_year = mdates.MonthLocator(interval=6)
axes.xaxis.set_major_locator(fmt_half_year)
axes.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
axes.set_ylabel('Water Surface Elevation [mNAVD88]')
axes.set_title('Water Surface Elevation')
axes.legend()
fig.show()
# fig.savefig('C:/Users/arey/files/Projects/Newtown/DataFigs/WaterLevel.png',
# bbox_inches='tight', dpi=300)
# %% Load temperature data
df_tdat = pd.read_excel('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/10_Salinity/tData.xlsx',
skiprows=[1])
# Day Month order is reversed
df_tdat['date'] = pd.to_datetime(df_tdat['CollectionDate'])
df_tsample = pd.read_csv('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/10_Salinity/tSampleLocation.csv')
# Create geodataframe and convert to USSP
gdf_tsample = gp.GeoDataFrame(
df_tsample, geometry=gp.points_from_xy(df_tsample.EastCoordinate, df_tsample.NorthCoordinate), crs="EPSG:2263")
#convert data to CRS of D3D
gdf_tsample.geometry = gdf_tsample.geometry.to_crs("EPSG:32118")
tdat_geo = list()
tdat_facility = list()
for i in range(0, len(df_tdat)):
#sampleID = df_tdat['LocationID'][i][0:df_tdat['LocationID'][i].find('_')]
#geoFind = df_tsample.loc[df_tsample['ParentLocationID'] == sampleID]
geoFind = df_tsample.loc[df_tsample['LocationID'] == df_tdat['LocationID'][i]].geometry.values
if len(geoFind) !=0:
tdat_geo.append(df_tsample.loc[df_tsample['LocationID'] == df_tdat['LocationID'][i]].geometry.values[0])
tdat_facility.append(df_tsample.loc[df_tsample['LocationID'] == df_tdat['LocationID'][i]].SourceArea.values[0])
else:
tdat_geo.append(Point())
tdat_facility.append('')
# Create geodataframe
gdf_tdat = gp.GeoDataFrame(df_tdat, geometry=tdat_geo, crs="EPSG:32118")
gdf_tdat.loc[:, 'SourceArea'] = tdat_facility
gdf_tdat.loc[gdf_tdat.loc[:, 'DepthUnit']=='ft', 'DepthM'] = gdf_tdat.loc[gdf_tdat.loc[:, 'DepthUnit']=='ft', 'BeginDepth']*0.3048
gdf_tdat.loc[gdf_tdat.loc[:, 'DepthUnit']=='cm', 'DepthM'] = gdf_tdat.loc[gdf_tdat.loc[:, 'DepthUnit']=='cm', 'BeginDepth']*0.01
# Import spring observations
df_springDat = pd.read_excel('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/11_Temperature/NC_Spr2012_Benthic_Water_Quality_FieldData&Observations_20121022.xlsx')
# Create geodataframe and convert to USSP
gdf_springDat = gp.GeoDataFrame(
df_springDat, geometry=gp.points_from_xy(df_springDat.x_coord_as_numeric, df_springDat.y_coord_as_numeric), crs="EPSG:2263")
gdf_springDat.geometry = gdf_springDat.geometry.to_crs("EPSG:32118")
# Import Summer observations
df_summerDat = pd.read_excel('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/11_Temperature/NC_Sum2012_Benthic_Water_Quality_FieldData&Observations_20130205.xlsx')
# Create geodataframe and convert to USSP
gdf_summerDat = gp.GeoDataFrame(
df_summerDat, geometry=gp.points_from_xy(df_summerDat.x_coord_as_numeric, df_summerDat.y_coord_as_numeric), crs="EPSG:2263")
gdf_summerDat.geometry = gdf_summerDat.geometry.to_crs("EPSG:32118")
# Merged
df_SpringSummerDat = pd.concat([df_springDat, gdf_summerDat], ignore_index=True)
gdf_SpringSummerDat = gp.GeoDataFrame(
df_SpringSummerDat, geometry=gp.points_from_xy(df_SpringSummerDat.x_coord_as_numeric, df_SpringSummerDat.y_coord_as_numeric), crs="EPSG:2263")
gdf_SpringSummerDat.geometry = gdf_SpringSummerDat.geometry.to_crs("EPSG:32118")
# %% Save temperature and salinity data
gdf_SpringSummerDat.to_csv('C:/Users/arey/files/Projects/Newtown/DataFigs/NetCDF/TempSal/Spring_Summer.csv')
# %% Plot Salinity Time series
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(6, 8))
plotMaskStation = (gdf_tdat.SourceArea == 'Newtown Creek') | (gdf_tdat.SourceArea == 'East Branch of Newtown Creek')
plotMaskWater = (gdf_tdat.SampleMedium == 'Surface Water')
plotMask = plotMaskStation & plotMaskWater
pltDat = axes.scatter(gdf_tdat.loc[plotMask, 'date'],
gdf_tdat.loc[plotMask, 'DepthM'], 10,
gdf_tdat.loc[plotMask, 'NumericResult'])
axes.set_ylim(10, 0)
axes.set_ylabel('Depth below water surface [m]')
axes.set_title('Surface Water Salinity Samples')
axes.set_xlabel('Date')
fmt_half_year = mdates.MonthLocator(interval=12)
axes.xaxis.set_major_locator(fmt_half_year)
axes.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
cbar = fig.colorbar(pltDat, ax=axes, shrink=0.95)
cbar.set_label('Salinity [PSU]')
pltDat.set_clim([5, 40])
fig.show()
# fig.savefig('C:/Users/arey/files/Projects/Newtown/DataFigs/Salinity.png',
# bbox_inches='tight', dpi=300)
# %% Plot Temperature Time series
## Additional salinity obs are here!
fig, axes = plt.subplots(nrows=6, ncols=1, figsize=(6, 8))
fig.tight_layout(pad=2)
for i, miles in enumerate(np.arange(0, 3, 0.5)):
plotMaskDistance = (gdf_SpringSummerDat.miles_from_NC_mouth > miles) & (
gdf_SpringSummerDat.miles_from_NC_mouth < miles + 0.5)
plotMaskStation = (gdf_SpringSummerDat.subfacility_code == 'Newtown Creek')
plotMaskVar = (gdf_SpringSummerDat.chemical_name == 'Temperature (field)')# Temperature (field)'
plotMask = plotMaskDistance & plotMaskStation & plotMaskVar
pltDat = axes[i].scatter(gdf_SpringSummerDat.loc[plotMask, 'location_start_date'],
gdf_SpringSummerDat.loc[plotMask, 'elev']*0.3048, 10,
gdf_SpringSummerDat.loc[plotMask, 'result_value'])
axes[i].set_ylim(-10, 0)
axes[i].set_ylabel('Elevation [m]')
cbar = fig.colorbar(pltDat, ax=axes[i], shrink=0.95)
cbar.set_label('Temp [degC]')
pltDat.set_clim([5, 30])
fmt_half_year = mdates.MonthLocator(interval=1)
axes[i].xaxis.set_major_locator(fmt_half_year)
axes[i].xaxis.set_major_formatter(mdates.DateFormatter('%m-%Y'))
axes[0].set_title('Surface Water Temperature Samples')
axes[i].set_xlabel('Date')
fig.show()
# fig.savefig('C:/Users/arey/files/Projects/Newtown/DataFigs/Temperature.png',
# bbox_inches='tight', dpi=300)
# %% Load in ADV data
# Read in locations
df_adv_locs = pd.read_excel('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/05 Currents/Velocity_Data_Compiled/Phase2/Locations/AQ_LocationsSO_ADCP_ADV_overview_Coords_20150126_AJMR.xlsx',
sheet_name='ADV2')
gdf_adv_locs = gp.GeoDataFrame(df_adv_locs,
geometry=gp.points_from_xy(df_adv_locs.X_NYSPLI, df_adv_locs.Y_NYSPLI), crs="EPSG:2263")
gdf_adv_locs = gdf_adv_locs.to_crs("EPSG:32118")
adv_data_path = '//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/03_Data/02_Physical/05 Currents/Velocity_Data_Compiled/Phase2/Raw'
adv_paths = ['NCP2_ADV_D1-D2/ADV_D1_070914_080214', 'NCP2_ADV_D1-D2/ADV_D2_080214_090814',
'NCP2_ADV_D3-D4/ADV_D3_090914_100614', 'NCP2_ADV_D3-D4/ADV_D4_100714_110214',
'NCP2_ADV_D5-D6/ADV_D5_110414_120214', 'NCP2_ADV_D5-D6/ADV_D6_120414_010615']
adv_gdfs = dict()
adv_dfs = dict()
for depIDX, adv_path in enumerate(adv_paths):
adv_files = os.listdir(adv_data_path + '/' + adv_path) # returns list of files in adv folder
for adv_file in adv_files:
if '.txt' in adv_file and 'Readme' not in adv_file:
df_in = pd.read_csv(adv_data_path + '/' + adv_path + '/' + adv_file, delim_whitespace=True, skipinitialspace=True)
for stationIDX, station in enumerate(gdf_adv_locs['parent_loc_code']):
if adv_file[-9:-6] in station:
# adv_geo_x = np.ones([len(df_in), 1]) * df_adv_locs.X_NYSPLI[stationIDX]
# adv_geo_y = np.ones([len(df_in), 1]) * df_adv_locs.Y_NYSPLI[stationIDX]
df_in['date'] = pd.to_datetime(df_in['Year'].astype(str) + '-' + df_in['Month'].astype(
str).str.zfill(2) + '-' + df_in['Day'].astype(
str).str.zfill(2) + ' ' + df_in['Hour'].astype(
str).str.zfill(2) + ':' + df_in['Minute'].astype(
str).str.zfill(2) + ':' + df_in['Second'].astype(str).str.zfill(2))
df_in.set_index('date', inplace=True)
df_in.drop(columns=['Year', 'Month', 'Day', 'Hour', 'Minute', 'Second'], inplace=True)
df_in.dropna(how='all', axis=1, inplace=True)
colName = df_in.columns
df_in[colName] = df_in[colName].astype('float32')
# Location
if adv_file[-9:-6] not in adv_gdfs:
adv_gdfs[adv_file[-9:-6]] = dict()
adv_dfs[adv_file[-9:-6]] = dict()
# D1-6
if adv_file[-6:-4] not in adv_gdfs[adv_file[-9:-6]]:
adv_gdfs[adv_file[-9:-6]][adv_file[-6:-4]] = gdf_adv_locs.iloc[stationIDX]
adv_dfs[adv_file[-9:-6]][adv_file[-6:-4]] = df_in
else:
adv_gdfs[adv_file[-9:-6]][adv_file[-6:-4]] = gdf_adv_locs.iloc[stationIDX]
adv_dfs[adv_file[-9:-6]][adv_file[-6:-4]].loc[:, colName] = df_in
print('ADV:' + adv_file[-9:-6] +
'; ' + adv_file[-6:-4] +
'; var:' + adv_file[3:6])
# with open('ADV.pickle', 'wb') as f:
# pickle.dump(adv_dfs, f)
# %% Save ADV to NetCDF
for stationIDX, stat in enumerate(adv_dfs):
for depIDX, depdat in enumerate(adv_dfs[stat]):
ncDat = adv_dfs[stat][depdat]
ncDat.columns = ncDat.columns.str.replace(r"[()]", "_")
ncDat.columns = ncDat.columns.str.replace(r"[/]", "_")
ncDat.to_xarray().to_netcdf(
'C:/Users/arey/files/Projects/Newtown/DataFigs/NetCDF/ADV/' + stat + '_' + depdat + '.nc')
gdf_adv_locs.to_csv('C:/Users/arey/files/Projects/Newtown/DataFigs/NetCDF/ADV/ADVlocations.csv')
# %% Plot ADV Data
fig, axes = plt.subplots(nrows=6, ncols=1, figsize=(9, 8))
fig.tight_layout(pad=2)
for stationIDX, stat in enumerate(adv_dfs):
for depIDX, depdat in enumerate(adv_dfs[stat]):
# adv_dfs[stat][depdat]['vel'].iloc[::60, :].plot(ax=axes[stationIDX])
if 'Velocity' in adv_dfs[stat][depdat].columns:
plotingDat = adv_dfs[stat][depdat].loc[:, 'Velocity'].resample('1s').mean()
else:
plotingDat = adv_dfs[stat][depdat].loc[:, 'Velocity_m_s_'].resample('1s').mean()
axes[stationIDX].plot(plotingDat.index, plotingDat)
# axes[stationIDX].set_ylim(0, 7)
axes[stationIDX].set_xlim(pd.to_datetime("2014-07-01"), pd.to_datetime('2015-02-01'))
# #axes[stationIDX, depIDX].format_xdata = mdates.DateFormatter('%Y-%m')
fmt_half_year = mdates.MonthLocator(interval=1)
axes[stationIDX].xaxis.set_major_locator(fmt_half_year)
axes[stationIDX].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
axes[stationIDX].set_title(str(stat))
#
axes[stationIDX].set_ylabel('ADV Velocity [m/s]')
# cbar.set_label('Velocity Magnitude [m/s]')
# pltDat.set_clim([0, 0.2])
plt.show()
fig.savefig('C:/Users/arey/files/Projects/Newtown/DataFigs/ADV_raw.png',
bbox_inches='tight', dpi=300)
# %% Plot Map
mapbox = 'https://api.mapbox.com/styles/v1/alexander0042/ckemxgtk51fgp19nybfmdcb1e/tiles/256/{z}/{x}/{y}@2x?access_token=pk.eyJ1IjoiYWxleGFuZGVyMDA0MiIsImEiOiJjazVmdG4zbncwMHY4M2VrcThwZGUzZDFhIn0.w6oDHoo1eCeRlSBpwzwVtw'
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(8, 8))
locTableNames = []
locTableX = []
locTableY = []
axes.set_xlim(303500, 306500)
axes.set_ylim(61000, 63750)
gdf.plot(ax=axes, markersize=10, color='blue', label='Mobile ADCP')
for x, y, label in zip(gdf.drop_duplicates(subset="station", keep='last').geometry.x,
gdf.drop_duplicates(subset="station", keep='last').geometry.y,
gdf.drop_duplicates(subset="station", keep='last').station):
axes.annotate(label, xy=(x, y), xytext=(20, 3), textcoords="offset points", color='blue',
bbox=dict(boxstyle="square,pad=0.3", fc="white", ec="k", lw=0))
gdf_SpringSummerDat.plot(ax=axes, markersize=12, color='magenta', label='Temperature & Salinity')
for x, y, label in zip(gdf_SpringSummerDat.drop_duplicates(subset="loc_name", keep='last').geometry.x,
gdf_SpringSummerDat.drop_duplicates(subset="loc_name", keep='last').geometry.y,
gdf_SpringSummerDat.drop_duplicates(subset="loc_name", keep='last').loc_name):
locTableNames.append(label)
locTableX.append(x)
locTableY.append(y)
gdf_moored_loc.plot(ax=axes, markersize=20, color='red', label='Moored ADCP 2012')
for x, y, label in zip(gdf_moored_loc.drop_duplicates(subset="depCurr", keep='last').geometry.x,
gdf_moored_loc.drop_duplicates(subset="depCurr", keep='last').geometry.y,
gdf_moored_loc.drop_duplicates(subset="depCurr", keep='last').depCurr):
locTableNames.append(label)
locTableX.append(x)
locTableY.append(y)
gdf_adcp2_locs.plot(ax=axes, markersize=20, color='orange', label='Moored ADCP 2014')
for x, y, label in zip(gdf_adcp2_locs.drop_duplicates(subset="parent_loc_code", keep='last').geometry.x,
gdf_adcp2_locs.drop_duplicates(subset="parent_loc_code", keep='last').geometry.y,
gdf_adcp2_locs.drop_duplicates(subset="parent_loc_code", keep='last').parent_loc_code):
axes.annotate(label, xy=(x, y), xytext=(-65, 3), textcoords="offset points", color='orange',
bbox=dict(boxstyle="square,pad=0.3", fc="white", ec="k", lw=0))
locTableNames.append(label)
locTableX.append(x)
locTableY.append(y)
gdf_adv_locs.plot(ax=axes, markersize=20, color='green', label='Moored ADV 2014')
for x, y, label in zip(gdf_adv_locs.geometry.x,
gdf_adv_locs.geometry.y,
gdf_adv_locs.parent_loc_code):
axes.annotate(label, xy=(x, y), xytext=(-30, -30), textcoords="offset points", color='green',
bbox=dict(boxstyle="square,pad=0.3", fc="white", ec="k", lw=0))
locTableNames.append(label)
locTableX.append(x)
locTableY.append(y)
gdf_gaugeLocUSSP.loc[2:3, 'geometry'].plot(ax=axes, markersize=20, color='yellow', label='Water Level Gauge')
for x, y, label in zip(gdf_gaugeLocUSSP.loc[2:3, 'geometry'].x,
gdf_gaugeLocUSSP.loc[2:3, 'geometry'].y,
gdf_gaugeLocUSSP.loc[2:3, 'Name']):
locTableNames.append(label)
locTableX.append(x)
locTableY.append(y)
for x, y, label in zip(gdf.geometry.x,
gdf.geometry.y,
gdf.station + '_' + gdf.transect.astype(str) + gdf['min'].astype(str) + gdf.second.astype(str)):
locTableNames.append(label)
locTableX.append(x)
locTableY.append(y)
ctx.add_basemap(axes, source=mapbox, crs='EPSG:32118')
axes.set_xlabel('New York State Plane Easting [m]')
axes.set_ylabel('New York State Plane Northing [m]')
axes.legend()
# axes[1].set_xlim(303500, 306500)
# axes[1].set_ylim(61000, 63750)
# gdf_SpringSummerDat.plot(ax=axes[1], markersize=12, color='magenta', label='Temperature & Salinity')
# axes[1].set_xlabel('New York State Plane Easting [m]')
# axes[1].legend()
# ctx.add_basemap(axes[1], source=mapbox, crs='EPSG:32118')
fig.show()
# fig.savefig('C:/Users/arey/files/Projects/Newtown/DataFigs/DataMap_ADV.png',
# bbox_inches='tight', dpi=300)
# %% Import grid shapefile and find cells
delftGrid = gp.read_file('C:/Users/arey/files/Projects/Newtown/Topology data of 2D network.shp')
delftGrid = delftGrid.set_crs("EPSG:32118")
delftGrid['centroid'] = delftGrid.geometry.centroid
obsPts = gp.GeoDataFrame(locTableNames, geometry=gp.points_from_xy(locTableX, locTableY), crs="EPSG:32118")
joinPTS = gp.sjoin(obsPts, delftGrid, op='within')
uniqueJoinPTS = joinPTS.index_right.unique()
groupdObsLabels = joinPTS.groupby(by='index_right').agg({0:lambda x:list(x)})
uniqueDelftGrid = delftGrid.iloc[uniqueJoinPTS, :]
uniqueDelftGrid['Station Names'] = groupdObsLabels
fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(8, 8))
axes.set_xlim(303500, 306500)
axes.set_ylim(61000, 63750)
delftGrid.plot(ax=axes, markersize=10, color='gray', label='Mobile ADCP')
delftGrid.loc[uniqueJoinPTS, 'geometry'].plot(ax=axes, markersize=10, color='blue', label='Mobile ADCP')
ctx.add_basemap(axes, source=mapbox, crs='EPSG:32118')
axes.set_xlabel('New York State Plane Easting [m]')
axes.set_ylabel('New York State Plane Northing [m]')
fig.show()
uniqueDelftGrid.to_excel('//srv-ott3/Projects/11934.201 Newtown Creek TPP Privileged and Confidential/06_Models/06_Delft3DFM/RectConnect2_Obs_PTS.xlsx')