# COMMAND ----------

#@title Imports and Global Variables (make sure to run this cell)  { display-mode: "form" }

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass


from __future__ import absolute_import, division, print_function


#@markdown This sets the warning status (default is `ignore`, since this notebook runs correctly)
warning_status = "ignore" #@param ["ignore", "always", "module", "once", "default", "error"]
import warnings
warnings.filterwarnings(warning_status)
with warnings.catch_warnings():
    warnings.filterwarnings(warning_status, category=DeprecationWarning)
    warnings.filterwarnings(warning_status, category=UserWarning)

import numpy as np
import os
#@markdown This sets the styles of the plotting (default is styled like plots from [FiveThirtyeight.com](https://fivethirtyeight.com/)
matplotlib_style = 'fivethirtyeight' #@param ['fivethirtyeight', 'bmh', 'ggplot', 'seaborn', 'default', 'Solarize_Light2', 'classic', 'dark_background', 'seaborn-colorblind', 'seaborn-notebook']
import matplotlib.pyplot as plt; plt.style.use(matplotlib_style)
import matplotlib.axes as axes;
from matplotlib.patches import Ellipse
#%matplotlib inline
import seaborn as sns; sns.set_context('notebook')
from IPython.core.pylabtools import figsize
#@markdown This sets the resolution of the plot outputs (`retina` is the highest resolution)
notebook_screen_res = 'retina' #@param ['retina', 'png', 'jpeg', 'svg', 'pdf']
#%config InlineBackend.figure_format = notebook_screen_res

import tensorflow as tf

import tensorflow_probability as tfp
tfd = tfp.distributions
tfb = tfp.bijectors

class _TFColor(object):
    """Enum of colors used in TF docs."""
    red = '#F15854'
    blue = '#5DA5DA'
    orange = '#FAA43A'
    green = '#60BD68'
    pink = '#F17CB0'
    brown = '#B2912F'
    purple = '#B276B2'
    yellow = '#DECF3F'
    gray = '#4D4D4D'
    def __getitem__(self, i):
        return [
            self.red,
            self.orange,
            self.green,
            self.blue,
            self.pink,
            self.brown,
            self.purple,
            self.yellow,
            self.gray,
        ][i % 9]
TFColor = _TFColor()

def session_options(enable_gpu_ram_resizing=True, enable_xla=False):
    """
    Allowing the notebook to make use of GPUs if they're available.

    XLA (Accelerated Linear Algebra) is a domain-specific compiler for linear
    algebra that optimizes TensorFlow computations.
    """
    config = tf.config
    gpu_devices = config.experimental.list_physical_devices('GPU')
    if enable_gpu_ram_resizing:
        for device in gpu_devices:
           tf.config.experimental.set_memory_growth(device, True)
    if enable_xla:
        config.optimizer.set_jit(True)
    return config

session_options(enable_gpu_ram_resizing=True, enable_xla=True)

# COMMAND ----------

# MAGIC %md #### TFP: Run the Data Prep

# COMMAND ----------

import matplotlib.pyplot as plt
import tensorflow_probability as tfp
import tensorflow as tf
tfd = tfp.distributions
tfb = tfp.bijectors

# COMMAND ----------

c_data = tf.constant([4,5,4,3,0,1,3,4,4,3,3,7,12,9,8,9,8,11,6,9,7,6,13,7,9,13,13,12,11,11,], dtype=tf.float32)
print(c_data)

batch_size = np.float32(int(tf.shape(c_data)[0]))
year = np.float32(tf.shape(c_data))
years = np.arange(0,year)
print(batch_size)
print(year)
print(len(years))
mu_c_data = tf.reduce_mean(c_data)
print(mu_c_data)
alpha = (1.0/mu_c_data)
print(alpha)

# COMMAND ----------

# Visualizing Data
plt.figure(figsize=(12.5, 4))
plt.bar(years, np.array(c_data), color="#5DA5DA")
plt.xlabel("Time (days)")
plt.ylabel("Sales")
plt.title("Did the rate change?")

# COMMAND ----------

# MAGIC %md The following code illustrates how to calculate switching patterns shown here https://www.tensorflow.org/probability/examples/Bayesian_Switchpoint_Analysis

# COMMAND ----------

# MAGIC %md #### Basic Model

# COMMAND ----------

def disaster_count_model(disaster_rate_fn):
    disaster_count = tfd.JointDistributionNamed(dict(
    e=tfd.Exponential(rate=alpha),
    l=tfd.Exponential(rate=alpha),
    s=tfd.Uniform(0., high=len(years)),
    d_t=lambda s, l, e: tfd.Independent(
        tfd.Poisson(rate=disaster_rate_fn(np.arange(len(years)), s, l, e)),reinterpreted_batch_ndims=1)
    ))
    return disaster_count

def disaster_rate_switch(ys, s, l, e):
    return tf.where(ys < s, e, l)

def disaster_rate_sigmoid(ys, s, l, e):
    return e + tf.sigmoid(ys - s) * (l - e)

model_switch = disaster_count_model(disaster_rate_switch)
model_sigmoid = disaster_count_model(disaster_rate_sigmoid)

def target_log_prob_fn(model, s, e, l):
    return model.log_prob(s=s, e=e, l=l, d_t=c_data)

models = [model_switch, model_sigmoid]

# COMMAND ----------

num_results = 10000
num_burnin_steps = 3000

@tf.function(autograph=False, jit_compile=True)
def make_chain(target_log_prob_fn):
    kernel = tfp.mcmc.TransformedTransitionKernel(
       inner_kernel=tfp.mcmc.HamiltonianMonteCarlo(
          target_log_prob_fn=target_log_prob_fn,
          step_size=0.05,
          num_leapfrog_steps=3),
       bijector=[
          # The switchpoint is constrained between zero and len(years).
          # Hence we supply a bijector that maps the real numbers (in a
          # differentiable way) to the interval (0;len(yers))
          tfb.Sigmoid(low=0., high=tf.cast(len(years), dtype=tf.float32)),
          # Early and late disaster rate: The exponential distribution is
          # defined on the positive real numbers
          tfb.Softplus(),
          tfb.Softplus(),
      ])

    kernel = tfp.mcmc.SimpleStepSizeAdaptation(
        inner_kernel=kernel,
        num_adaptation_steps=int(0.8*num_burnin_steps))

    states = tfp.mcmc.sample_chain(
      num_results=num_results,
      num_burnin_steps=num_burnin_steps,
      current_state=[
          # The three latent variables
          tf.ones([], name='init_switchpoint'),
          tf.ones([], name='init_early_disaster_rate'),
          tf.ones([], name='init_late_disaster_rate'),
      ],
      trace_fn=None,
      kernel=kernel)
    return states

switch_samples = [s.numpy() for s in make_chain(
    lambda *args: target_log_prob_fn(model_switch, *args))]
sigmoid_samples = [s.numpy() for s in make_chain(
    lambda *args: target_log_prob_fn(model_sigmoid, *args))]

switchpoint, early_disaster_rate, late_disaster_rate = zip(
    switch_samples, sigmoid_samples)

# COMMAND ----------

def _desc(v):
    return '(med: {}; 95%: $[{}, {}]$)'.format(
      *np.round(np.percentile(v, [50, 2.5, 97.5]), 1))

for t, v in [
    ('Early disaster rate ($e$) posterior samples', early_disaster_rate),
    ('Late disaster rate ($l$) posterior samples', late_disaster_rate),
    ('Switch point ($s$) posterior samples', years[0] + switchpoint),
]:
    fig, ax = plt.subplots(nrows=1, ncols=2, sharex=True)
    for (m, i) in (('Switch', 0), ('Sigmoid', 1)):
        a = ax[i]
        a.hist(v[i], bins=50)
        a.axvline(x=np.percentile(v[i], 50), color='k')
        a.axvline(x=np.percentile(v[i], 2.5), color='k', ls='dashed', alpha=.5)
        a.axvline(x=np.percentile(v[i], 97.5), color='k', ls='dashed', alpha=.5)
        a.set_title(m + ' - ' + _desc(v[i]))
    fig.suptitle(t)
    plt.show()

# COMMAND ----------

og = pd.Series([np.round(x,0) for x in switchpoint[0]])
og_length = len(og)
summary = og.value_counts().sort_values(axis='index')
stats = pd.DataFrame({"count": summary})
stats.reset_index(inplace=True)
stats = stats.rename(columns={'index': 'day'})
stats['total'] = stats['count'].sum()
stats['probability'] = stats['count']/stats['total']
stats['probabiltiy_rank'] = stats['probability'].rank(method='max',ascending=False)
stats_out = stats[stats['probabiltiy_rank'] <= 3]
# print(stats)
print(stats_out)

# COMMAND ----------

# MAGIC %md #### Batched Model

# COMMAND ----------

print(len([4,5,4,3,0,1,3,4,4,3,3,7,12,9,8,9,8,11,6,9,7,6,13,7,9,13,13,12,11,11]))
print(len([9,11,9,13,6,12,6,12,8,7,12,9,6,8,7,4,0,2,3,3,0,1,5,4,1,3,3,4,4,0]))

# COMMAND ----------

# Defining our Data and assumptions
c_data_batched = tf.constant([
 [4,5,4,3,0,1,3,4,4,3,3,7,12,9,8,9,8,11,6,9,7,6,13,7,9,13,13,12,11,11,],
 [9,11,9,13,6,12,6,12,8,7,12,9,6,8,7,4,0,2,3,3,0,1,5,4,1,3,3,4,4,0,],
], dtype=tf.float32)


batch_size = np.float32(int(tf.shape(c_data_batched)[0]))
year = np.float32(tf.shape(c_data_batched)[1])
years = np.arange(0,year)
print(batch_size)
print(len(years))
print(c_data_batched)
mu_c_data = tf.reduce_mean(c_data_batched,1)
print(mu_c_data)
alpha = (1.0/mu_c_data)
print(alpha)
print(tf.size(c_data_batched))

# COMMAND ----------

def describe_distributions(distributions):
    print('\n'.join([str(d) for d in distributions]))

distributions_below = [
    tfd.Exponential(rate=alpha)
    , tfd.Independent(tfd.Uniform(low=np.float32(np.zeros(int(batch_size))), 
                                    high=np.float32(np.repeat(year, batch_size))), reinterpreted_batch_ndims=0)
    , tfd.Independent(tfd.Uniform(low=np.float32(np.zeros(int(batch_size))), 
                                    high=np.float32(np.repeat(year, batch_size))), reinterpreted_batch_ndims=1)
    ,tfd.Independent(tfd.Poisson(rate=mu_c_data), reinterpreted_batch_ndims=0)
    ,tfd.Independent(tfd.Poisson(rate=mu_c_data), reinterpreted_batch_ndims=1)
]

describe_distributions(distributions_below)

# COMMAND ----------

print(np.float32(np.repeat(len(years), batch_size)))
print(np.float32(np.zeros(int(batch_size))))

# COMMAND ----------

def disaster_count_model(disaster_rate_fn):
    disaster_count = tfd.JointDistributionNamed(
      dict(
        e=tfd.Independent(tfd.Exponential(rate=alpha)),
        l=tfd.Independent(tfd.Exponential(rate=alpha)),
        s=tfd.Uniform(low=0., high=len(years)),
        d_t=lambda s, l, e: tfd.Independent(tfd.Poisson(rate=disaster_rate_fn(np.arange(len(years)), s, l, e)), reinterpreted_batch_ndims=1))
    )
    return disaster_count

def disaster_rate_switch(ys, s, l, e):
      return tf.where(ys < s, e, l)

def disaster_rate_sigmoid(ys, s, l, e):
      return e + tf.sigmoid(ys - s) * (l - e)

model_switch = disaster_count_model(disaster_rate_switch)
model_sigmoid = disaster_count_model(disaster_rate_sigmoid)

def target_log_prob_fn(model, s, e, l):
      return model.log_prob(s=s, e=e, l=l, d_t=c_data_batched)

models = [model_switch, model_sigmoid]

# COMMAND ----------

num_results = 5000
num_burnin_steps = 1500

@tf.function(autograph=False, jit_compile=True)
def make_chain(target_log_prob_fn):
   kernel = tfp.mcmc.TransformedTransitionKernel(
       inner_kernel=tfp.mcmc.HamiltonianMonteCarlo(
          target_log_prob_fn=target_log_prob_fn,
          step_size=0.05,
          num_leapfrog_steps=3),
       bijector=[
          # The switchpoint is constrained between zero and len(years).
          # Hence we supply a bijector that maps the real numbers (in a
          # differentiable way) to the interval (0;len(yers))
          tfb.Sigmoid(low=0., high=tf.cast(len(years), dtype=tf.float32)),
          # Early and late disaster rate: The exponential distribution is
          # defined on the positive real numbers
          tfb.Softplus(),
          tfb.Softplus(),
      ])
   kernel = tfp.mcmc.SimpleStepSizeAdaptation(
        inner_kernel=kernel,
        num_adaptation_steps=int(0.8*num_burnin_steps))

   states = tfp.mcmc.sample_chain(
      num_results=num_results,
      num_burnin_steps=num_burnin_steps,
      current_state=[
          # The three latent variables
          tf.ones([], name='init_switchpoint'),
          tf.ones([], name='init_early_disaster_rate'),
          tf.ones([], name='init_late_disaster_rate'),
      ],
      trace_fn=None,
      kernel=kernel)
   return states

switch_samples = [s.numpy() for s in make_chain(
    lambda *args: target_log_prob_fn(model_switch, *args))]
sigmoid_samples = [s.numpy() for s in make_chain(
    lambda *args: target_log_prob_fn(model_sigmoid, *args))]

switchpoint, early_disaster_rate, late_disaster_rate = zip(
    switch_samples, sigmoid_samples)

# COMMAND ----------

def _desc(v):
  return '(med: {}; 95% CI: $[{}, {}]$)'.format(
      *np.round(np.percentile(v, [50, 2.5, 97.5]), 2))

for t, v in [
    ('Early disaster rate ($e$) posterior samples', early_disaster_rate),
    ('Late disaster rate ($l$) posterior samples', late_disaster_rate),
    ('Switch point ($s$) posterior samples', years[0] + switchpoint),
]:
  fig, ax = plt.subplots(nrows=1, ncols=2, sharex=True)
  for (m, i) in (('Switch', 0), ('Sigmoid', 1)):
    a = ax[i]
    a.hist(v[i], bins=50)
    a.axvline(x=np.percentile(v[i], 50), color='k')
    a.axvline(x=np.percentile(v[i], 2.5), color='k', ls='dashed', alpha=.5)
    a.axvline(x=np.percentile(v[i], 97.5), color='k', ls='dashed', alpha=.5)
    a.set_title(m + ' model ' + _desc(v[i]))
  fig.suptitle(t)
  plt.show()

# COMMAND ----------

og = pd.Series([np.round(x,0) for x in switchpoint[0]])
og_length = len(og)
summary = og.value_counts().sort_values(axis='index')
stats = pd.DataFrame({"count": summary})
stats.reset_index(inplace=True)
stats = stats.rename(columns={'index': 'day'})
stats['total'] = stats['count'].sum()
stats['probability'] = stats['count']/stats['total']
stats['probabiltiy_rank'] = stats['probability'].rank(method='max',ascending=False)
stats_out = stats[stats['probabiltiy_rank'] <= 3]
# print(stats)
print(stats_out)

# COMMAND ----------