# -*- coding: utf-8 -*-
"""
Created on Mon Feb 27 10:12:19 2023

@author: usaied
"""


import datetime
#import calendar
#import math
#import re
import pandas as pd
#import seaborn as sns
import matplotlib.dates as mdates
#import dateutil
import numpy as np
import matplotlib.pyplot as plt
#import statsmodels.api as sm
#from statsmodels.tsa.statespace.sarimax import SARIMAX 
#from statsmodels.tsa.seasonal import STL
#import sklearn
import scipy
#pip install pyextremes

if __name__ == '__main__':
    from pyextremes import get_extremes, get_return_periods
    from pyextremes import EVA
from pyextremes.plotting import plot_extremes


# Read Data and Convert Datetime string to Datetime object
d_parser = lambda x: datetime.datetime.strptime(x,'%Y-%m-%d %H:%M')
WL_hourly = pd.read_csv("//srv-oak3.baird.com/Projects/13471.101 Ontario Place Therme/05_Analyses/37_EVA PyExtremes/input_WL.csv", skiprows = 16, parse_dates = ['Datetime'], date_parser = d_parser)


#%% Fix for multiprocessing pool with PyCharm
__file__ = 'WL_Extremes.py'


#%% =========Data Processing and preparation================
# Delete rows with no water level values
WL_hourly = WL_hourly.drop(WL_hourly[(WL_hourly.WL < -900)].index)
WL_hourly.dropna() # drop nan values
# Reset index to Datetime
WL_hourly.set_index('Datetime', inplace=True) 
# Hourly resample with linear interpolation
WL_hourly = WL_hourly.asfreq('h') # This will generate missing values
WL_hourly = WL_hourly.interpolate(method = 'linear') #Linear interpolation 
# Drop rows from extended WL dataframe
StartDate = WL_hourly.index.min()
EndDate = WL_hourly.index.max()
Length_of_record = (EndDate-StartDate).days/365.2425 # in years
#WL_hr_POR = WL_hourly.drop(WL_hourly[(WL_hourly.index < StartDate) | (WL_hourly.index > EndDate)].index) 
#df1['WL'] = WL_hr_POR['WL']

# Resample for Monthly average water levels
# Resample with monthly freq.
WaterLevels_MM = WL_hourly.resample('M').mean() # Monthly resample
WaterLevels_MM = WaterLevels_MM.interpolate(method='linear')
WaterLevels_MM.rename(columns = {'WL':'WL_MM'}, inplace = True)

##### Gaussian Kernel - Develop a continous variable for EVA ###
Length = 30 # 30 Days average
x = np.array(WL_hourly['WL'])
y_static = scipy.ndimage.gaussian_filter(x, sigma=Length*6, order=0) # Note the Gaussian kernel is applied applied over 4 Sigma by default - Therfore multiplying Length (days) by 24hr/4 

WL_hourly['Static_WL'] = y_static
WL_hourly['Surge'] = WL_hourly['WL'] - WL_hourly['Static_WL'] 

Max_Surge = WL_hourly['Surge'].max()


###### Peak Over Threshold - Surge Analysis #####
Threshold = 0.15 # Surge threshold
Storm_Duration = "48H" # Hours
if __name__ == '__main__':
    Surge = get_extremes(WL_hourly.Surge, "POT", threshold = 0.14, r="48H")
    model = EVA(WL_hourly.Surge)
    model.get_extremes(method = "POT", threshold = 0.14, r="48H")
    model.plot_extremes()

#Fit Distribution: By default the distribution is selected automatically as best between 
#'genextreme' (GEV) and 'gumbel_r' for 'BM' extremes and 
#'genpareto' and 'expon' for 'POT' extremes. 
#Best distribution is selected using the AIC metric.

#model.fit_model(model='MLE', distribution='genpareto')
if __name__ == '__main__':
    model.fit_model()
    RP_Surge = model.get_summary(return_period=[2, 5, 10, 25, 50, 100, 200, 500, 1000], alpha=0.95, n_samples=500)
    fig, ax = model.plot_diagnostic(alpha=0.95)
    # plt.savefig('Surge_EVA.png')

plt.show()

#%%

######## Annual Maxima EVA (BM) Smoothed Static WL Signal ####
Static_WL = get_extremes(WL_hourly.Static_WL, "BM", block_size="365.2425D")
model = EVA(WL_hourly.Static_WL)
model.get_extremes(method = "BM", block_size="365.2425D")
model.plot_extremes()
model.fit_model()
RP_Static = model.get_summary(return_period=[2, 5, 10, 25, 50, 100, 200, 500, 1000], alpha=0.95, n_samples=1000)
fig, ax = model.plot_diagnostic(alpha=0.95)
plt.savefig('Static_EVA.png')

######## Annual Maxima EVA (BM) Hourly WL Signal ####
Combined_WL = get_extremes(WL_hourly.WL, "BM", block_size="365.2425D")
model = EVA(WL_hourly.WL)
model.get_extremes(method = "BM", block_size="365.2425D")
model.plot_extremes()
model.fit_model()
RP_Combined = model.get_summary(return_period=[2, 5, 10, 25, 50, 100, 200, 500, 1000], alpha=0.95, n_samples=1000)
fig, ax = model.plot_diagnostic(alpha=0.95)
plt.savefig('Combined_EVA.png')


# Time Series Plot
fig, ax = plt.subplots(2, figsize=(30,10), dpi=400)
ax[0].plot(WL_hourly.index, WL_hourly['WL'], linewidth=0.5, label='Water Level (m IGLD85)')
ax[0].plot(WL_hourly.index, WL_hourly['Static_WL'], linewidth=1.5, color='black', label='Static Water Level (m IGLD85)')
ax[0].legend()
ax[0].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
ax[0].xaxis.set_major_locator(mdates.YearLocator(1, month=1, day=1))
ax[0].set_xlim([StartDate, EndDate])
ax[0].set_ylabel("Water Level (m IGLD85)", fontsize=10)
ax[0].grid(color = 'grey', linestyle = '--', linewidth = 0.5)

ax[1].plot(WL_hourly.index, WL_hourly['Surge'], linewidth=0.5, label='Surge (m)')
ax[1].legend()
ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
ax[1].xaxis.set_major_locator(mdates.YearLocator(1, month=1, day=1))
ax[1].set_xlim([StartDate, EndDate])
ax[1].set_ylabel("Surge (m)", fontsize=10)
ax[1].grid(color = 'grey', linestyle = '--', linewidth = 0.5)


plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
plt.gca().xaxis.set_major_locator(mdates.YearLocator(1, month=1, day=1))
plt.tight_layout()
plt.savefig('Water_Level_TS.png')