import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
plt.figure(figsize=(20, 12))
np.random.seed(123)
n = 100000
fs = {"fontsize" : 16}
sns.kdeplot(stats.distributions.gumbel_r().rvs(n), label="Gumbel") 
sns.kdeplot(stats.distributions.norm().rvs(n), label="Normal")
plt.legend(**fs)
plt.xticks(**fs)
plt.yticks(**fs)
plt.ylabel("Density", **fs)
plt.xlabel("X", **fs);
df = pd.read_csv(
    "/Users/conormcdonald/Downloads/archive-5/DailyDelhiClimateTrain.csv",
    parse_dates=["date"]

)   
block_maxima_month = (
    df
    .set_index("date")
    .resample("M")
    .max()
    .wind_speed
    .reset_index()
    .query("wind_speed > 5")
)

fs = {"fontsize" : 16}
plt.figure(figsize=(24, 10))
plt.scatter(df.date, df.wind_speed, alpha=.3)
plt.plot(df.date, df.wind_speed, alpha=.7, c="b")
plt.scatter(block_maxima_month.date, block_maxima_month.wind_speed, c="red", s=80)
plt.xticks(**fs)
plt.yticks(**fs)
plt.ylabel("Wind speed", **fs)
plt.xlabel("Date", **fs);
plt.figure(figsize=(24, 10))
sns.distplot(block_maxima_month.wind_speed)
plt.xticks(**fs)
plt.yticks(**fs)
plt.ylabel("Density", **fs)
plt.xlabel("Wind speed", **fs);
/Users/conormcdonald/opt/anaconda3/lib/python3.7/site-packages/seaborn/distributions.py:2551: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
ev_model = pm.Model()

with ev_model:
    
    loc = pm.Normal("loc", 5, 10)
    scale = pm.HalfCauchy("scale", 1)
    
    lik = pm.Gumbel("lik", loc, scale, observed=block_maxima_month.wind_speed)
    
    ev_trace = pm.sample()
    
    
norm_model = pm.Model()

with norm_model:
    
    loc = pm.Normal("loc", 5, 10)
    scale = pm.HalfCauchy("scale", 1)
    
    lik = pm.Normal("lik", loc, scale, observed=block_maxima_month.wind_speed)
    
    norm_trace = pm.sample()

t_model = pm.Model()    
with t_model:
    
    loc = pm.Normal("loc", 5, 10)
    scale = pm.HalfCauchy("scale", 1)
    nu = pm.Exponential("nu", 20)
    
    lik = pm.StudentT("lik", nu, loc, scale, observed=block_maxima_month.wind_speed)
    
    t_trace = pm.sample()
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [scale, loc]
100.00% [8000/8000 00:02<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 3 seconds.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [scale, loc]
100.00% [8000/8000 00:02<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 3 seconds.
The acceptance probability does not match the target. It is 0.8800705684725215, but should be close to 0.8. Try to increase the number of tuning steps.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [nu, scale, loc]
100.00% [8000/8000 00:02<00:00 Sampling 4 chains, 0 divergences]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 3 seconds.
p1 = pm.traceplot(ev_trace, figsize=(14, 7))
/Users/conormcdonald/opt/anaconda3/lib/python3.7/site-packages/arviz/data/io_pymc3.py:89: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  FutureWarning,
p2 = pm.traceplot(norm_trace, figsize=(14, 7));
/Users/conormcdonald/opt/anaconda3/lib/python3.7/site-packages/arviz/data/io_pymc3.py:89: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  FutureWarning,
pm.plot_posterior(ev_trace)
pm.plot_posterior(norm_trace)
/Users/conormcdonald/opt/anaconda3/lib/python3.7/site-packages/arviz/data/io_pymc3.py:89: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  FutureWarning,
/Users/conormcdonald/opt/anaconda3/lib/python3.7/site-packages/arviz/data/io_pymc3.py:89: FutureWarning: Using `from_pymc3` without the model will be deprecated in a future release. Not using the model will return less accurate and less useful results. Make sure you use the model argument or call from_pymc3 within a model context.
  FutureWarning,
array([<AxesSubplot:title={'center':'loc'}>,
       <AxesSubplot:title={'center':'scale'}>], dtype=object)
ev_post = pm.sample_posterior_predictive(ev_trace, model=ev_model)["lik"].T
norm_post = pm.sample_posterior_predictive(norm_trace, model=norm_model)["lik"].T
t_post = pm.sample_posterior_predictive(t_trace, model=t_model)["lik"].T
100.00% [4000/4000 00:01<00:00]
100.00% [4000/4000 00:03<00:00]
100.00% [4000/4000 00:03<00:00]
ev_post = pm.sample_posterior_predictive(ev_trace, model=ev_model)["lik"].T
norm_post = pm.sample_posterior_predictive(norm_trace, model=norm_model)["lik"].T

# Plot density 
plt.figure(figsize=(20, 10))
pm.plot_dist(ev_post, label="Gumbel", textsize=20, hist_kwargs=fs)
pm.plot_dist(norm_post, label="Normal", textsize=20, )
sns.kdeplot(block_maxima_month.wind_speed, label="Actual")
plt.legend(fontsize=14)
<matplotlib.legend.Legend at 0x7fd72bd41e10>
def calcualte_exceedance_probability(exceed_values, posterior):
    n = posterior.shape[1]
    ix = []
    probs_m = []
    probs_05 = []
    probs_95 = []
    for i in exceed_values:
        p = ((posterior>i).sum(1)/n)
        p05 = np.quantile(p, 0.01)
        p95 = np.quantile(p, 0.99)
        probs_m.append(p.mean())
        probs_05.append(p05)
        probs_95.append(p95)
        ix.append(i)
    return pd.DataFrame(dict(exceed_value=ix, ep_mean=probs_m, ep_lb=probs_05, ep_ub=probs_95))

exceedance = calcualte_exceedance_probability(list(range(1, 60)), ev_post)
plt.figure(figsize=(20, 12))
plt.plot(exceedance.exceed_value, exceedance.ep_mean, c="r", label="ev")
plt.fill_between(exceedance.exceed_value, exceedance.ep_lb, exceedance.ep_ub, color="b", alpha=.3)
plt.xticks(**fs)
plt.yticks(**fs)
plt.ylabel("Wind speed", **fs)
plt.xlabel("Date", **fs);
plt.legend()
<matplotlib.legend.Legend at 0x7fd7329cc890>