# -*- coding: utf-8 -*- """ Created on Mon Feb 27 10:12:19 2023 @author: usaied """ import datetime #import calendar #import math #import re import pandas as pd #import seaborn as sns import matplotlib.dates as mdates #import dateutil import numpy as np import matplotlib.pyplot as plt #import statsmodels.api as sm #from statsmodels.tsa.statespace.sarimax import SARIMAX #from statsmodels.tsa.seasonal import STL #import sklearn import scipy #pip install pyextremes if __name__ == '__main__': from pyextremes import get_extremes, get_return_periods from pyextremes import EVA from pyextremes.plotting import plot_extremes # Read Data and Convert Datetime string to Datetime object d_parser = lambda x: datetime.datetime.strptime(x,'%Y-%m-%d %H:%M') WL_hourly = pd.read_csv("//srv-oak3.baird.com/Projects/13471.101 Ontario Place Therme/05_Analyses/37_EVA PyExtremes/input_WL.csv", skiprows = 16, parse_dates = ['Datetime'], date_parser = d_parser) #%% Fix for multiprocessing pool with PyCharm __file__ = 'WL_Extremes.py' #%% =========Data Processing and preparation================ # Delete rows with no water level values WL_hourly = WL_hourly.drop(WL_hourly[(WL_hourly.WL < -900)].index) WL_hourly.dropna() # drop nan values # Reset index to Datetime WL_hourly.set_index('Datetime', inplace=True) # Hourly resample with linear interpolation WL_hourly = WL_hourly.asfreq('h') # This will generate missing values WL_hourly = WL_hourly.interpolate(method = 'linear') #Linear interpolation # Drop rows from extended WL dataframe StartDate = WL_hourly.index.min() EndDate = WL_hourly.index.max() Length_of_record = (EndDate-StartDate).days/365.2425 # in years #WL_hr_POR = WL_hourly.drop(WL_hourly[(WL_hourly.index < StartDate) | (WL_hourly.index > EndDate)].index) #df1['WL'] = WL_hr_POR['WL'] # Resample for Monthly average water levels # Resample with monthly freq. WaterLevels_MM = WL_hourly.resample('M').mean() # Monthly resample WaterLevels_MM = WaterLevels_MM.interpolate(method='linear') WaterLevels_MM.rename(columns = {'WL':'WL_MM'}, inplace = True) ##### Gaussian Kernel - Develop a continous variable for EVA ### Length = 30 # 30 Days average x = np.array(WL_hourly['WL']) y_static = scipy.ndimage.gaussian_filter(x, sigma=Length*6, order=0) # Note the Gaussian kernel is applied applied over 4 Sigma by default - Therfore multiplying Length (days) by 24hr/4 WL_hourly['Static_WL'] = y_static WL_hourly['Surge'] = WL_hourly['WL'] - WL_hourly['Static_WL'] Max_Surge = WL_hourly['Surge'].max() ###### Peak Over Threshold - Surge Analysis ##### Threshold = 0.15 # Surge threshold Storm_Duration = "48H" # Hours if __name__ == '__main__': Surge = get_extremes(WL_hourly.Surge, "POT", threshold = 0.14, r="48H") model = EVA(WL_hourly.Surge) model.get_extremes(method = "POT", threshold = 0.14, r="48H") model.plot_extremes() #Fit Distribution: By default the distribution is selected automatically as best between #'genextreme' (GEV) and 'gumbel_r' for 'BM' extremes and #'genpareto' and 'expon' for 'POT' extremes. #Best distribution is selected using the AIC metric. #model.fit_model(model='MLE', distribution='genpareto') if __name__ == '__main__': model.fit_model() RP_Surge = model.get_summary(return_period=[2, 5, 10, 25, 50, 100, 200, 500, 1000], alpha=0.95, n_samples=500) fig, ax = model.plot_diagnostic(alpha=0.95) # plt.savefig('Surge_EVA.png') plt.show() #%% ######## Annual Maxima EVA (BM) Smoothed Static WL Signal #### Static_WL = get_extremes(WL_hourly.Static_WL, "BM", block_size="365.2425D") model = EVA(WL_hourly.Static_WL) model.get_extremes(method = "BM", block_size="365.2425D") model.plot_extremes() model.fit_model() RP_Static = model.get_summary(return_period=[2, 5, 10, 25, 50, 100, 200, 500, 1000], alpha=0.95, n_samples=1000) fig, ax = model.plot_diagnostic(alpha=0.95) plt.savefig('Static_EVA.png') ######## Annual Maxima EVA (BM) Hourly WL Signal #### Combined_WL = get_extremes(WL_hourly.WL, "BM", block_size="365.2425D") model = EVA(WL_hourly.WL) model.get_extremes(method = "BM", block_size="365.2425D") model.plot_extremes() model.fit_model() RP_Combined = model.get_summary(return_period=[2, 5, 10, 25, 50, 100, 200, 500, 1000], alpha=0.95, n_samples=1000) fig, ax = model.plot_diagnostic(alpha=0.95) plt.savefig('Combined_EVA.png') # Time Series Plot fig, ax = plt.subplots(2, figsize=(30,10), dpi=400) ax[0].plot(WL_hourly.index, WL_hourly['WL'], linewidth=0.5, label='Water Level (m IGLD85)') ax[0].plot(WL_hourly.index, WL_hourly['Static_WL'], linewidth=1.5, color='black', label='Static Water Level (m IGLD85)') ax[0].legend() ax[0].xaxis.set_major_formatter(mdates.DateFormatter('%Y')) ax[0].xaxis.set_major_locator(mdates.YearLocator(1, month=1, day=1)) ax[0].set_xlim([StartDate, EndDate]) ax[0].set_ylabel("Water Level (m IGLD85)", fontsize=10) ax[0].grid(color = 'grey', linestyle = '--', linewidth = 0.5) ax[1].plot(WL_hourly.index, WL_hourly['Surge'], linewidth=0.5, label='Surge (m)') ax[1].legend() ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%Y')) ax[1].xaxis.set_major_locator(mdates.YearLocator(1, month=1, day=1)) ax[1].set_xlim([StartDate, EndDate]) ax[1].set_ylabel("Surge (m)", fontsize=10) ax[1].grid(color = 'grey', linestyle = '--', linewidth = 0.5) plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y')) plt.gca().xaxis.set_major_locator(mdates.YearLocator(1, month=1, day=1)) plt.tight_layout() plt.savefig('Water_Level_TS.png')