# -*- coding: utf-8 -*-
"""Tesi_Monitoring_Sara_Bodo_276110.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1dAqNY-d5pxNRkkcdxlrCrYalQ6kqaerp

#### import and functions
"""

! pip install matplotx[all]
! pip install causalml
! pip install shap

import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from keras import optimizers
plt.style.use('fivethirtyeight')
from sklearn.preprocessing import StandardScaler
from keras.layers import Input
from keras.layers import Dense

import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import ParameterGrid, KFold
from sklearn.model_selection import train_test_split

import datetime
from scipy.stats import ks_2samp
from sklearn.metrics import mean_squared_error, mean_absolute_error
import re 
plt.style.use('seaborn')

from keras.layers import Input
from keras.layers import Dense
from keras.layers import BatchNormalization
from keras import backend as K
from keras import Model
import tensorflow as tf
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

import os

from google.cloud import storage
import pickle
import pandas as pd
from scipy.stats import ks_2samp
import joblib

from sklearn.metrics import precision_score, \
    recall_score, confusion_matrix, classification_report, \
    accuracy_score, f1_score
import numpy as np

import matplotx
import random

from sklearn import preprocessing
from keras.regularizers import l2

from sklearn.ensemble import ExtraTreesRegressor

seed = 42
random.seed(seed)
tf.random.set_seed(seed)
np.random.seed(seed=seed)

aircon_dataset_filename = "aircon_kl.csv"
electric_dataset_filename = "electric_kl.csv"

lista_tassi = "lista_tassi.csv"
col = -4

from sklearn.metrics import confusion_matrix
import sys
sys.path.insert(0,"/content/drive/MyDrive/Colab_Notebooks")

path = "/content/drive/MyDrive/Colab_Notebooks/"

import joblib

from tensorflow import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from keras import optimizers
plt.style.use('fivethirtyeight')
from sklearn.preprocessing import StandardScaler
from keras.layers import Input
from keras.layers import Dense

import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import ParameterGrid, KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import datetime
from scipy.stats import ks_2samp
from sklearn.metrics import mean_squared_error, mean_absolute_error
import re 
plt.style.use('seaborn')

from keras.layers import Input
from keras.layers import Dense
from keras.layers import BatchNormalization
from keras import backend as K
from keras import Model
from sklearn.metrics import mean_squared_error




def get_edith_model(x_train,b):

    d = 0.3
    

    model = Sequential()
    
    
    model.add(LSTM(b, return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[2])  , dropout=d, recurrent_dropout = d))
    #model.add(LSTM(8, return_sequences=True , dropout=d, recurrent_dropout = d ))
    model.add(LSTM(8, return_sequences=True , dropout=d, recurrent_dropout = d ))
    
    model.add(LSTM(2, return_sequences=False , dropout=d, recurrent_dropout = d))
    

    model.add(Dense(1))

    return model

def plot_training_loss(history):

    train_loss = history["loss"]
    val_loss = history["val_loss"]
    t = np.linspace(1, len(train_loss), len(train_loss))

    plt.figure(figsize=(5,5))
    plt.title("Mean squared error")
    sns.lineplot(x=t, y=train_loss, label="Train", linewidth=3)
    sns.lineplot(x=t, y=val_loss, label="Validation", linewidth=3)
    plt.xlabel("Epochs")

    plt.legend()
    plt.show()
    #print(f"Training MSE = {np.sqrt(train_loss[-1])}")
    #print(f"Validation MSE = {np.sqrt(val_loss[-1])}")

features_aircon  = ['mean_Alarm_Persistence_last14d_mean', 'num_temperature_last3d',
       'Air_Conditioning_System_Type_door mounted',
       'mean_OriginalSeverity_last14d_max',
       'mean_Alarm_Persistence_last7d_mean', '75%_Temp',
       'mean_OriginalSeverity_last7d_max', 'tasso_Aircon_last7d', 'uvIndex',
       'pressure_prev14d_std', 'KPI_Delta_Temp_prev14d_std',
       'num_allarmi_g_last3d', 'KPI_Delta_Temp_last14d_std',
       'humidity_prev14d_std', 'precipmm_prev14d_max', 'precipmm_last14d_max',
       'mintempc_prev14d_std', 'mean_Alarm_Persistence_last3d_mean',
       'cloudcover_prev14d_max', 'maxtempc_prev14d_std',
       'humidity_last14d_std', 'cloudcover_last14d_min',
       'pressure_last14d_std', 'mean_OriginalSeverity_last3d_mean',
       'maxtempc_last3d_mean', 'maxtempc_last14d_std',
       'tasso_num_power_last30d', 'windspeedkmph_prev7d_min',
       'mintempc_last14d_std', 'cloudcover_prev14d_std',
       'pressure_prev7d_std']

features_electric  = ['tasso_num_tickets_last60d', 'tasso_Electric_last30d',
       'tasso_num_mains_last60d', 'Air_Conditioning_System_Type_door mounted',
       'tasso_Electric_last14d', '75%_Temp',
       'mean_OriginalSeverity_last14d_max', 'Location_indoor/shelter',
       'mean_OriginalSeverity_last7d_max', 'Location_indoor',
       'precipmm_last14d_max', 'Location_outdoor',
       'KPI_Delta_Temp_last14d_std', 'pressure_prev14d_std',
       'pressure_last14d_std', 'humidity_last14d_std',
       'KPI_Delta_Temp_prev14d_std', 'mean_Alarm_Persistence_last14d_mean',
       'mintempc_last14d_std', 'cloudcover_last14d_std',
       'mintempc_prev14d_std', 'humidity_prev14d_std', 'tasso_Electric_last7d',
       'mean_OriginalSeverity_last3d_mean', 'maxtempc_last3d_mean',
       'cloudcover_prev14d_std', 'maxtempc_prev14d_std',
       'maxtempc_last14d_std', 'windspeedkmph_prev7d_min',
       'windspeedkmph_last7d_min', 'cloudcover_prev14d_max']

electric_feature_importance = [20, 18, 27, 26, 21, 16, 15, 19, 12, 25, 13, 14, 10, 24, 30, 17, 29, 28,  8, 23, 22,  6,  2, 11,  5,  7,  9,  4,  0,  1,  3]
aircon_feature_importance = [30, 23, 11, 22, 25, 29, 19, 28, 16, 20, 13, 17, 10, 15, 12,  9, 24, 14, 21,  6, 18,  4, 26, 27,  8,  0,  7,  3,  5,  1,  2]

def get_target_tassi(lista_tassi, show):

    tassi = pd.read_csv(lista_tassi, index_col=0)

    tassi_aircon   = list(tassi.iloc[2:101,0]) #uno lo si perde per differenza di batch, l'altro per errore
    tassi_electric = list(tassi.iloc[1:101,1]) #uno lo si perde per differenza di batch

    tassi_aircon   = np.concatenate([[tasso]*6 for tasso in tassi_aircon], axis = 0)
    tassi_electric = np.concatenate([[tasso]*6 for tasso in tassi_electric], axis = 0)

    if show:
        print("SHAPE check: " ,tassi_aircon.shape, " - ",tassi_electric.shape)
        plt.plot(tassi_aircon)
        plt.show()
        plt.plot(tassi_electric)
        plt.show()

    return tassi_aircon, tassi_electric

show = False

tassi_aircon, tassi_electric = get_target_tassi(lista_tassi, show)

def get_metrics_to_monitor(dataset, col):

    tmp = dataset.iloc[:,col:].describe()[1:3].round(3).T

    print("\n\n Metrics to Monitor\n\n")
    display(tmp)

    tmp_sel = tmp[(tmp["mean"]>0.1) & (tmp["std"]>=0.01)]
    metrics_to_monitor = list(tmp_sel.index) 
    print("\n\n FILTER [MEAN > 0.1 and STD > 0.01] \n\n")
    display(tmp_sel)
    print("\n\n Metrics respecting the condition: ", metrics_to_monitor," \n\n")

    for m in metrics_to_monitor:
        plt.title(m)
        plt.plot(dataset[m])
        plt.show()

    return metrics_to_monitor

def get_feature_selection(dataset, metrics_to_monitor, features, col, show):

    model = ExtraTreesRegressor(n_estimators=10)
    model.fit(dataset.iloc[:,:col], dataset[metrics_to_monitor])

    feat_importance = pd.Series(model.feature_importances_, index = features)
    index_feat_impo = list(feat_importance.nlargest(10).index)

    tmp = pd.concat([dataset[index_feat_impo], dataset[metrics_to_monitor]], axis = 1)

    if show:
        feat_importance.nlargest(10).plot(kind='barh')
        plt.show()

    return tmp

def get_edith_trained(x_train, x_test, y_train, y_test, batch_size, lr, b, epochs, one_to_predict):
    
    #y_train_underestimate = pd.Series(y_train).apply(lambda x: x*1.05 if x <0 else x*0.95).to_numpy()
    #y_train_underestimate = pd.Series(y_train).apply(lambda x: x*1.5 if x <(y_train.mean()-y_train.std()) else x*.98).to_numpy()


    model = get_edith_model(x_train,b)
    model.compile(loss='mse', optimizer = tf.keras.optimizers.Adam(learning_rate=lr)) 
    
    if one_to_predict:
        model.fit(x_train, y_train*.98, batch_size=batch_size, epochs=epochs, verbose=0)
        pred = model.predict(np.expand_dims(x_test, axis=0))[0][0]
    else: 
        history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,  validation_split=0.1, verbose=0).history
        plot_training_loss(history)
        
        plt.figure(figsize=(20,5))
        plt.plot(y_train, color = "Pink", lw = 2, label = "True")
        plt.plot(model.predict(x_train), color = "black", lw = 1, label = "Prediction")
        plt.show()
        
        pred = []

    return model, pred

def get_splits(dataset_scaled,lag):

    x = dataset_scaled.iloc[:, :-1]
    y = dataset_scaled.iloc[:, -1]

    x_train       = np.expand_dims(x.iloc[0:0+step+1, :], axis=0) #record 0-10
    y_train       = np.expand_dims(y.iloc[0+step+lag], axis = 0) #label 11

    for i in range(1,x.shape[0]-step-lag): 
        x_train = np.concatenate((x_train, np.expand_dims(x.iloc[i:i+step+1, :], axis=0)))
        y_train = np.concatenate((y_train, np.expand_dims(y.iloc[i+step+lag], axis=0)))               #599 

    return x_train, y_train

"""#### main"""

batches_path = sorted(os.listdir("Batches_14d/"))[1:]
batches_y_path = sorted(os.listdir("Batches_14d_target/"))[1:]

#batches_y_path[0]

'''df = pd.read_csv("Batches_14d_aircon/"+batches_path[0],index_col=0).reset_index(drop=True)
df_a = df[df["date"] == "2018-01-28"].reset_index(drop=True)
df_a.round(1).T

#.#.T#.drop(columns = 'date')'''

batches_path = sorted(os.listdir("Batches_14d/"))[1:]
batches_y_path = sorted(os.listdir("Batches_14d_target/"))[1:]

batches_path_aircon = sorted(os.listdir("Batches_14d_aircon/"))
batches_y_path_aircon = sorted(os.listdir("Batches_14d_target_aircon/"))

print(batches_path[0], batches_path_aircon[0])

modello_xgboost = joblib.load('modello.pkl') # aricon

import random

def predict(batch, modello_xgboost, y_true):
    
    list_recall = []
    list_precision = []
    list_f1 = []
        
    for i in range(0,5):    
        batch_noise = batch + np.random.normal(0, 0.03*i, batch.shape)
        y_pred = modello_xgboost.predict(batch_noise)
        list_recall.append(np.mean(recall_score(y_true,y_pred)))
        list_precision.append(np.mean(precision_score(y_true,y_pred)))
        list_f1.append(np.mean(f1_score(y_true,y_pred)))
        
    return list_recall, list_precision, list_f1

edith_dataset = []

window = 30000

for i in range(len(batches_path)-2): #l'ultimo sfasa le dimensioni
    
    print(i)
    
    y_true = pd.read_csv("Batches_14d_target/"+batches_y_path[i+1],index_col=0).reset_index(drop=True).drop(columns = 'date')
    
    # *************** REAL RECORD ***************
    
    batch_0 = pd.read_csv("Batches_14d/"+batches_path[i],index_col=0).reset_index(drop=True).drop(columns = 'date')
    batch_1 = pd.read_csv("Batches_14d/"+batches_path[i+1],index_col=0).reset_index(drop=True).drop(columns = 'date')
    #record = [ks_2samp(batch_0.iloc[:,col], batch_1.iloc[:,col])[1] for col in range(len(batch_0.columns))]  
    
    record = []
    for col in range(len(batch_0.columns)):
    
        p = np.histogram(batch_0.iloc[:,col], bins = 1000)[0] / len(batch_0.iloc[:,col])    
        q = np.histogram(batch_1.iloc[:,col], bins = 1000)[0] / len(batch_1.iloc[:,col])

        record.append(distance.jensenshannon(p, q))

    list_recall, list_precision, list_f1 = predict(batch_1, modello_xgboost, y_true)
    
    record.append(list_recall)
    record.append(list_precision)
    record.append(list_f1)
    
    
    edith_dataset.append(record)
    
    # *************** FAKE X5 ***************
    
    for _ in range(5):

        rows = random.sample(range(batch_0.shape[0]), window)

        batch_0_aug = batch_0.iloc[rows,:]
        batch_1_aug = batch_1.iloc[rows,:]
        #record = [ks_2samp(batch_0_aug.iloc[:,col], batch_1_aug.iloc[:,col])[1] for col in range(len(batch_0.columns))]  
                     
                     
        record = []
        for col in range(len(batch_0.columns)):

            p = np.histogram(batch_0_aug.iloc[:,col], bins = 1000)[0] / len(batch_0_aug.iloc[:,col])    
            q = np.histogram(batch_1_aug.iloc[:,col], bins = 1000)[0] / len(batch_1_aug.iloc[:,col])

            record.append(distance.jensenshannon(p, q))

        list_recall, list_precision, list_f1 = predict(batch_1_aug, modello_xgboost, y_true.iloc[rows,0])
        
        record.append(list_recall)
        record.append(list_precision)
        record.append(list_f1)
        
        #print(list_recall)
        
    
        edith_dataset.append(record)
    

e = pd.DataFrame(edith_dataset)
e.round(2)

e["recall"]    = e.iloc[:,31].apply(lambda x : np.median(np.array(x))) 
e["precision"] = e.iloc[:,32].apply(lambda x : np.median(np.array(x))) 
e["f1"]        = e.iloc[:,33].apply(lambda x : np.median(np.array(x))) 

a = e.drop(columns = [31,32,33])
a

reference_mse = np.array(a["MED_recall"])
a["r_0_1"]   = np.concatenate([np.zeros((1))[0:1],reference_mse[1:]-reference_mse[:-1]])

reference_mse = np.array(a["MED_precision"])
a["p_0_1"]   = np.concatenate([np.zeros((1))[0:1],reference_mse[1:]-reference_mse[:-1]])

reference_mse = np.array(a["MED_f1"])
a["f_0_1"]   = np.concatenate([np.zeros((1))[0:1],reference_mse[1:]-reference_mse[:-1]])

a.to_csv("edith_dataset_kl_electric.csv")

# *********** AIRCON O ELECTRIC

name     = aircon_dataset_filename 
tassi    = tassi_aircon
features = features_aircon
features_importance = aircon_feature_importance

'''
name     = electric_dataset_filename 
tassi    = tassi_electric
features = features_electric
features_importance = electric_feature_importance'''

dataset  = pd.read_csv(name, index_col=0 )   
dataset.columns = np.append(features, dataset.columns[-3:])
#print("Original shape : ",dataset.shape)

KPI      = pd.DataFrame(tassi * dataset["recall"].to_numpy(), columns = ["KPI"])
dataset  = pd.concat([dataset, KPI], axis = 1)
#print("Shape with KPI : ",dataset.shape)

metrics_to_monitor = get_metrics_to_monitor(dataset, col)
#print("\n\nMetrics to monitor : ",metrics_to_monitor, "\n\n")


# *********** SCELGO LA METRICA DA MONITORARE 

m = metrics_to_monitor[1]
target = dataset[m]

dataset = get_feature_selection(dataset, m, features, col, show)
#print("Shape after Feat. Reduction : ", dataset.shape)

backward = 2                            
trend = np.concatenate([np.zeros(backward), target[1:(-backward+1)].values - target[:-backward].values]) #(-1,-2)
th_drop = pd.DataFrame(trend).describe().iloc[4,0].round(4)
#print("Cum function at 25% of new column to define threshold: ", th_drop)
new_col   = pd.DataFrame([1 if i < th_drop else 0 for i in trend], columns = ["trend"])

dataset = pd.concat([new_col, dataset], axis = 1)
#print("Shape after Artificial Feat. : ", dataset.shape)
#display(dataset.round(2))
#dataset.describe()

std_scaler = preprocessing.MinMaxScaler()
dataset_scaled = pd.DataFrame(std_scaler.fit_transform(dataset.values), columns = dataset.columns)
#dataset_scaled.describe().round(2).T

gt = np.array(dataset_scaled['KPI'])
import numpy
numpy.savetxt('time_series_kpi.gz', gt)

"""#### test"""

step = 10
max_lag = 5

end = dataset_scaled.shape[0]-step-max_lag
limit = dataset_scaled.shape[0]-step-max_lag-100

diz_train_test = {}

for lag in range(1,max_lag+1):

    xtr, ytr =  get_splits(dataset_scaled, lag) 
    #print(xtr.shape, ytr.shape)
    #print(ytr[0])

    diz_train_test['x_test_'+str(lag)]= xtr[limit:end, :, :]
    diz_train_test['y_test_'+str(lag)]= ytr[limit:end]
    diz_train_test['x_train_'+str(lag)]= xtr[:limit,:,:]
    diz_train_test['y_train_'+str(lag)]= ytr[:limit]

list(diz_train_test.keys())

#dataset_scaled.iloc[11:11+5,-1]

#for i in range(2,5+1):
#    print(diz_train_test['y_train_'+str(i)][:5])
#    print(np.unique(diz_train_test['x_train_'+str(i)][0,:,:]==diz_train_test['x_train_'+str(i-1)][0,:,:]))

#for k,v in diz_train_test.items():
    #print(k, v.shape)

batch_size, lr,b, epochs =  64, 0.001,32, 40

#************************* LEARNING CURVE 
for i in range(2,5+1):
    model_tmp, _ = get_edith_trained(diz_train_test['x_train_'+str(i)], 
                                     diz_train_test['x_train_'+str(i)], 
                                     diz_train_test['y_train_'+str(i)],
                                     diz_train_test['y_train_'+str(i)],
                                     batch_size, lr, b, epochs, False)

#************************* TRAINING 
for i in range(1,5+1):
    
    model_tmp = get_edith_model(diz_train_test['x_train_'+str(i)],b)
    model_tmp.compile(loss='mse', optimizer = tf.keras.optimizers.Adam(learning_rate=lr)) 
    model_tmp.fit(diz_train_test['x_train_'+str(i)], diz_train_test['y_train_'+str(i)], batch_size=batch_size, epochs=epochs, verbose=0)
    
    model_tmp.save('model_aircon_kpi'+str(i)+'.pkl')

#************************* reTRAINING 

batch_size, lr,b, epochs =  8, 0.001,62, 40


for i in [3]:
    
    model_tmp = get_edith_model(diz_train_test['x_train_'+str(i)],b)
    model_tmp.compile(loss='mse', optimizer = tf.keras.optimizers.Adam(learning_rate=lr)) 
    model_tmp.fit(diz_train_test['x_train_'+str(i)], diz_train_test['y_train_'+str(i)], batch_size=batch_size, epochs=epochs, verbose=0)
    
    model_tmp.save('model_aircon_kpi'+str(i)+'.pkl')

models = {}
for i in range(1,max_lag+1): 
    models[i] = keras.models.load_model('model_aircon_kpi'+str(i)+'.pkl')

models

diz_test_predictions = {}

for i in range(1,max_lag+1):
    diz_test_predictions[i] = models[i].predict(diz_train_test['x_test_'+str(i)])

list(diz_test_predictions.keys())

for i in range(1,max_lag+1):
    
    plt.figure(figsize=(20,5))
    plt.plot(diz_train_test['y_test_'+str(i)], color = "Pink", lw = 2, label = "True")
    plt.plot(diz_test_predictions[i], color = "black", lw = 1, label = "Prediction")
    plt.show()

weight_model = []
for i in range(1,max_lag+1):
    weight_model.append(diz_test_predictions[i].mean().round(3))
print(weight_model)
print(np.sort(weight_model))
print(np.argsort(weight_model)+1)

prediction_list = []
for i in range(1,max_lag+1):
    prediction_list.append(diz_test_predictions[i][5+1-i:100-i+1])
print(len(prediction_list[0]), '\n\n')
print('prediction_list starts with: ',prediction_list[0].mean().round(3), '\n\n')

prediction_list_sorted = []
for i in np.argsort(weight_model)+1:
    prediction_list_sorted.append(diz_test_predictions[i][5+1-i:100-i+1])
print('prediction_list_sorted starts with: ',prediction_list_sorted[0].mean().round(3), '\n\n')

prediction_avg = np.average(prediction_list_sorted, axis = 0, weights = [40,20,20,10,10])
prediction_avg.mean().round(3)

plt.figure(figsize=(20,5))

for i in range(1,5+1):
    if i == 5:
        plt.plot(diz_train_test['y_test_'+str(i)][5+1-i:100-i+1], color = "pink", lw = 2, label = "True") #5 volte la stessa cosa se è corretto
    #else:
        #plt.plot(diz_train_test['y_test_'+str(i)][5+1-i:100-i+1], color = "gray", lw = 2) #5 volte la stessa cosa se è corretto
    plt.plot(diz_test_predictions[i][5+1-i:100-i+1], color = "blue", lw = 1, ls = '--')
plt.plot(prediction_avg, color = "midnightblue", lw = 3, label = "Prediction" )
plt.xlabel('timestep')
plt.ylabel('KPI')
plt.legend()
plt.show()

i=1
today = diz_train_test['y_test_'+str(i)][5+1-i:100-i+1][:-1]
tomorrow = prediction_avg[1:,0]
delta = tomorrow-today

th = -dataset_scaled.iloc[:,-1].std()/8
print(th)
tmp = diz_train_test['y_test_'+str(i)][5+1-i:100-i+1]
delta_real = tmp[1:]-tmp[:-1]

colors      = ['lightgray' if i>th else 'red' for i in delta  ]
colors_real = ['lightgray' if i>th else 'blue' for i in delta_real]

print(delta.shape)
print(delta_real.shape)

plt.figure(figsize=(20,5))
plt.scatter(list(range(delta.shape[0])),delta, color =  colors, label = 'drift_predicted')
plt.scatter(list(range(delta.shape[0])),delta_real, color = colors_real, label = 'real_drift')
plt.hlines(y=0, xmin=0, xmax=delta.shape, linewidth=1, color='black')
plt.hlines(y=th, xmin=0, xmax=delta.shape, linewidth=1, color='red', ls = '--')
plt.legend()
plt.xlabel('timestep')
plt.ylabel('delta')
plt.show()

print('rosso predicted, blue real')

print(today.shape, tomorrow.shape, delta.shape)

cm = (confusion_matrix(delta_real<th, delta<th))
print((cm/delta_real.shape).round(2))

recall = cm[1,1]/(cm[1,1]+cm[1,0])
print('\nrecall: ',recall.round(2)) #tp / (tp + fn)

precision = cm[1,1]/(cm[1,1]+cm[0,1])
print('\nprecision: ',recall.round(2)) #tp / (tp + fp)

tmp_index = pd.Series(delta<th)
drop_index = list(tmp_index[tmp_index == True].index)

i=3
plt.figure(figsize=(20,5))
plt.plot(diz_train_test['y_test_'+str(i)][5+1-i:100-i+1], color = "Pink", lw = 2, label = "True")
plt.plot(prediction_avg, color = "midnightblue", lw = 3, label = "Prediction" )
plt.scatter(drop_index, prediction_avg[drop_index], lw = 3)
plt.xlabel('timestep')
plt.ylabel('KPI')
plt.show()

i=3
plt.figure(figsize=(20,5))
plt.plot(today, color = "Pink", lw = 2, label = "today")
plt.plot(tomorrow, color = "midnightblue", lw = 3, label = "tomorrow" )
plt.scatter(drop_index, tomorrow[drop_index], lw = 3)
#plt.scatter([1,16,39,54,70], tomorrow[[1,16,39,54,70]], lw = 10)
plt.legend()
plt.xlabel('timestep')
plt.ylabel('Recall')
plt.show()

np.array(drop_index)

#tf.compat.v1.disable_v2_behavior()

import shap

explainer_dict = {}
shap_dict = {}

for key in range(1,max_lag+1):
    explainer_dict[key] = shap.GradientExplainer(models[i], diz_train_test['x_train_'+str(key)])
    shap_dict[key] = explainer_dict[key].shap_values(diz_train_test['x_test_'+str(key)])[0]
    
#display(explainer_dict)
#display(shap_dict)

for i in range(1,6):
    print(diz_train_test['y_test_'+str(i)].shape, 
          diz_train_test['y_test_'+str(i)][1:10].round(2), 
          diz_train_test['y_test_'+str(i)][-10:].round(2))
print()
for i in range(1,6):
    print(diz_train_test['y_test_'+str(i)][max_lag+1-i:100-i+1].shape, 
          diz_train_test['y_test_'+str(i)][max_lag+1-i:100-i+1][1:10].round(2), 
          diz_train_test['y_test_'+str(i)][max_lag+1-i:100-i+1][-10:].round(2))

#for i in range(1,6):
    #print(shap_dict[i].shape)

import matplotlib

for punto in [78,79,80]:#drop_index:

    print(punto,'*'*20)
    
    dict_output = {}
    for key in dataset_scaled.columns[1:11]:
        dict_output[key] = []
    
    for ii in range(1,6):
        
        shap_test = shap_dict[ii][range(5+1-ii,100-ii+1),:,:]   #shap = pd.DataFrame(shap_test[:,-1,:], columns = dataset.columns[:-1])

        shap_imp = pd.DataFrame(shap_test[punto,:,1:]-shap_test.min(), columns = dataset.columns[1:-1]) 
        top_feat_shap = np.argsort(list(shap_imp.loc[10]))+1
        
        kl_i = diz_train_test['x_test_'+str(ii)][range(5+1-ii,100-ii+1),:,:]
        top_kl = [np.mean(kl_i[punto,:,j]) - kl_i[punto,:,j][-1] for j in top_feat_shap] 

        for i, key in enumerate(dataset.columns[top_feat_shap]):
            #if ii == 1:
                #print(shap_imp.loc[10][key].round(3), i)
            dict_output[key].append([ i, top_kl[i].round(2)])                  
            
    #display(pd.DataFrame(dict_output))


    for k,v in dict_output.items():
        tmp = np.stack(v).astype(float)
        dict_output[k] = [np.sum(tmp[:,0]),np.mean(tmp[:,1])]                      #display(dict_output)

    dict_out ={}
    for kk,vv in dict_output.items():
        dict_out[vv[0]] = [kk, vv[1]]
    #display(dict_out)


    index_sorted = np.sort(list(dict_out.keys()))[::-1]
    l_tmp = []
    for el in index_sorted:
        l_tmp.append(dict_out[el])

    kl_values = np.stack(l_tmp)[:,1].astype(float) 

    for i,j,k in zip(index_sorted, np.stack(l_tmp)[:,0], kl_values):
        print(k.astype(float).round(2),'\t',i,'\t',j)

    print(np.cumsum(np.abs(kl_values)/np.sum(np.abs(kl_values)).round(2)))

    soglia_60 = np.cumsum(np.abs(kl_values)/np.sum(np.abs(kl_values)))
    soglia_60 = np.where(soglia_60 >= 0.6)[0][0]

    intensity = (np.abs(kl_values[:soglia_60+1])/np.sum(np.abs(kl_values[:soglia_60+1]))).round(2)
    #print(intensity)
    #if punto > 60 :
    
    plt.figure(figsize = (7,2))
    plt.barh(np.stack(l_tmp)[:,0][:soglia_60+1], kl_values[:soglia_60+1], height = 0.4, color = matplotlib.cm.get_cmap('Blues')(intensity+.5))
    plt.xlim((-.4,.4))
    plt.show()

