## for data
import numpy as np
import pandas as pd
## for plotting
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
## for machine learning
from sklearn import preprocessing, cluster
import scipy
## for simple routing
import osmnx as ox  #1.2.2
## for advanced routing
from ortools.constraint_solver import pywrapcp  #9.6
from ortools.constraint_solver import routing_enums_pb2

dtf = pd.read_csv(r'/Users/vittorioguglielmoglave/PycharmProjects/Thesis/Updated_temp.csv')
dtf = dtf[["New Request","Lat","Lng","Time Interval"]].reset_index(drop=True)
# create a new column 'id' to identify each point in the dtf
dtf = dtf.reset_index().rename(columns={"index":"id", "Lat":"Lat", "Lng":"Lng","Time Interval":"Time Interval"}) # Latitude=Y axis and Longitude=X axis
dtf.head()

i = 0
dtf["base"] = dtf["id"].apply(lambda x: 1 if x==i else 0)
start = dtf[dtf["base"]==1][["Lat","Lng"]].values[0]

############################################ CLUSTERING BY CONSIDERING ONLY KNOWN CUSTOMERS AT t=0 ######################################################
# in X we have only customers (not the depot) that we alreary know
X = dtf[dtf["base"]==0][dtf["Time Interval"]==0][["Lat","Lng"]]
################################# FIND THE RIGHT NUMBER K OF CLUSTERS ######################################
max_k = 7
## iterations
distortions = []
for i in range(1, max_k+1):
    if len(X) >= i:
       model = cluster.KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
       model.fit(X)
       distortions.append(model.inertia_)
## best k: the lowest derivative
k = [i*100 for i in np.diff(distortions,2)].index(min([i*100 for i
     in np.diff(distortions,2)]))

## plot
fig, ax = plt.subplots()
ax.plot(range(1, len(distortions)+1), distortions)
ax.axvline(k, ls='--', color="red", label="k = "+str(k))
ax.set(title='The Elbow Method', xlabel='Number of clusters',
       ylabel="Distortion")
ax.legend()
ax.grid(True)

plt.show()

########################################## DIVIDE LOCATIONS IN K CLUSTERS ######################################
k = 4
model = cluster.KMeans(n_clusters=k, init='k-means++')
X = dtf[dtf["base"]==0][dtf["Time Interval"]==0][["Lat","Lng"]]

dtf_X = X.copy()

dtf_X["cluster"] = model.fit_predict(X)

dtf["cluster"] = dtf_X["cluster"]
dtf.sample(5)


## plot
fig, ax = plt.subplots()
palette_personalizzata = ["#FF5733", "#33FF57", "#3366FF", "#FF33A1", "#33FFFF", "#FF3366", "#FFFF33", "#9933FF", "#FF9933", "#33FF99"]
# create a graph using the sns library

sns.scatterplot(x="Lat", y="Lng", data=dtf,
                palette=sns.color_palette(palette_personalizzata,k),
                hue='cluster', legend=False, ax=ax).set_title('Clustering(k='+str(k)+'): known customers at time t=0')

ax.set_xlabel('Latitude')
ax.set_ylabel('Longitude')

ax.scatter(start[0], start[1], c='black', marker='^')
plt.show()


# create network graph
G = ox.graph_from_point(start, dist=10000,network_type="drive")
G = ox.add_edge_speeds(G)
G = ox.add_edge_travel_times(G)

## get the node for each location (both depot and customers)
# create a new column 'node' in dtf with the node value for each location
dtf["node"] = dtf[["Lat","Lng"]].apply(lambda x: ox.nearest_nodes(G, x[1], x[0]), axis=1)
dtf = dtf.drop_duplicates("node", keep='first')
dtf.head()

## this function computes the distance shortest path between each node
def shortest_distance(a,b):
    try:
        d = nx.shortest_path_length(G, source=a, target=b, method='dijkstra', weight='travel_time')
    except:
        d = np.nan
    return d


############################################# CLUSTERING WITH ONLY KNOWN CUSTOMERS AT t=0 #############################################################
# these are variables needed in all clusters. Do not initialize in each cluster
dic_routes_clusters = {}
total_distance = 0
total_load = 0

for clust in range(k):
    lst_for_dist_matrix = dtf[dtf["base"] == 1]["node"].tolist()
    lst_for_dist_matrix += dtf[dtf["cluster"] == clust]["node"].tolist()
    lst_id_nodes = dtf[dtf["base"] == 1]["id"].tolist()
    lst_id_nodes += dtf[dtf["cluster"] == clust]["id"].tolist()

    my_dict = dict(zip(lst_id_nodes, lst_for_dist_matrix))

    # it important to associate the shortest route computed by routing model ortools
    # with the right 'id' of nodes in the dataframe dtf
    size = list(range(0, len(lst_for_dist_matrix)))

    dict_route_nodes = dict(zip(size, lst_id_nodes))

    # 'distance matrix' creates the matrix of distances for the cluster
    distance_matrix = np.asarray([[shortest_distance(a, b) for b in lst_for_dist_matrix] for a in lst_for_dist_matrix])
    # int values are needed for the routing model
    distance_matrix = (np.rint(distance_matrix)).astype(int)

    # Parameters
    driver = 1
    # we need the equivalent node in the graph
    start_node = ox.nearest_nodes(G, start[1], start[0])
    print("start node:", start_node, "| total locations to visit in the cluster:", len(lst_for_dist_matrix) - 1,
          "| drivers:", driver, "\n")
    driver_capacity = [100]
    demands = [0] + [1] * (len(lst_for_dist_matrix) - 1)
    max_distance = 100000

    # Create the routing index manager
    manager = pywrapcp.RoutingIndexManager(len(lst_for_dist_matrix), driver, lst_for_dist_matrix.index(start_node))
    # Create routing model.
    routing = pywrapcp.RoutingModel(manager)


    def distance_callback(from_index, to_index):
        """Returns the distance between the two nodes."""
        # Convert from routing variable Index to distance matrix NodeIndex.
        from_node = manager.IndexToNode(from_index)
        to_node = manager.IndexToNode(to_index)
        return distance_matrix[from_node][to_node]


    transit_callback_index = routing.RegisterTransitCallback(distance_callback)

    # Define cost of each arc.
    routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)


    # The constraint about capacity
    def get_demand(from_index):
        return demands[from_index]


    demand = routing.RegisterUnaryTransitCallback(get_demand)

    routing.AddDimensionWithVehicleCapacity(demand, slack_max=0,
                                            vehicle_capacities=driver_capacity,
                                            fix_start_cumul_to_zero=True,
                                            name='Capacity')

    # The constraint about distance
    name = 'Distance'
    routing.AddDimension(transit_callback_index, slack_max=0, capacity=max_distance,
                         fix_start_cumul_to_zero=True, name=name)
    distance_dimension = routing.GetDimensionOrDie(name)
    distance_dimension.SetGlobalSpanCostCoefficient(100)

    # Initial solution that minimizes costs
    parameters = pywrapcp.DefaultRoutingSearchParameters()
    parameters.first_solution_strategy = (routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)
    # Metaheuristic optimization of initial solution
    parameters.local_search_metaheuristic = (routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
    # The solver is configured to run for a maximum of 1 second.
    # This means that the solver will attempt to find the best solution within the specified time limit
    # and may terminate if the time limit is reached, even if an optimal solution hasn't been found yet.
    parameters.time_limit.FromSeconds(1)
    solution = routing.SolveWithParameters(parameters)


    index = routing.Start(0)
    route_idx = []
    route_distance = 0
    route_load = 0
    while not routing.IsEnd(index):
        node_index = manager.IndexToNode(index)
        route_idx.append(manager.IndexToNode(index))
        previous_index = index
        index = solution.Value(routing.NextVar(index))
        route_distance += distance_callback(previous_index, index)
        route_load += demands[node_index]  ## for data
    # in route_idx there is a sequence of descending numbers that build the route
    # in my_route, the 'id' of the nodes of the route are extracted and
    # the same route of 'route-idx' now it's shown in my_route but using the 'id' of nodes
    route_idx.append(manager.IndexToNode(index))
    my_route = [dict_route_nodes[x] for x in route_idx]
    print(my_route)
    dic_routes_clusters[clust] = my_route
    # route_distance has the distance in meters and so it is divided by 1000
    # And then the result has 2 decimals
    print(f'distance: {round(route_distance / 1000, 2)} km')
    print(f'load: {round(route_load, 2)}', "\n")
    total_distance += route_distance
    total_load += route_load

print(f'Total distance: {round(total_distance / 1000, 2)} km')
print(f'Total load: {total_load}')

################################################# ROUTING REPRESENTATION FOR ALL CLUSTERS AT t=0 #############################################
fig, ax = plt.subplots()
palette_personalizzata = ["#FF5733", "#33FF57", "#3366FF", "#FF33A1", "#33FFFF", "#FF3366", "#FFFF33", "#9933FF", "#FF9933", "#33FF99"]

# Scatter plot for clusters
sns.scatterplot(x="Lat", y="Lng", data=dtf,
                palette=sns.color_palette(palette_personalizzata, k),
                hue='cluster', legend=False, ax=ax).set_title('Clustering(k=' + str(k) + '): routing representation at t=0')

# Add the start point (depot)
ax.scatter(start[0], start[1], c='black', marker='^')

ax.set_xlabel('Latitude')
ax.set_ylabel('Longitude')

for k,v in dic_routes_clusters.items():
    route_coordinates = dtf.loc[v, ["Lat", "Lng"]]
    ax.plot(route_coordinates["Lat"], route_coordinates["Lng"], linestyle='--', color='black')

plt.show()

############################################## NEW CLUSTERING WITH NEW CUSTOMERS ARRIVED AT t=1 ##################################
# in X we have only customers (not the depot) that we alreary know
X = dtf[dtf["base"]==0][dtf["Time Interval"]<2][["Lat","Lng"]]
################################# FIND THE RIGHT NUMBER K OF CLUSTERS ######################################
max_k = 7
# iterations
distortions = []
for i in range(1, max_k+1):
    if len(X) >= i:
       model = cluster.KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
       model.fit(X)
       distortions.append(model.inertia_)
# best k: the lowest derivative
k = [i*100 for i in np.diff(distortions,2)].index(min([i*100 for i
     in np.diff(distortions,2)]))

# plot
fig, ax = plt.subplots()
ax.plot(range(1, len(distortions)+1), distortions)
ax.axvline(k, ls='--', color="red", label="k = "+str(k))
ax.set(title='The Elbow Method', xlabel='Number of clusters',
       ylabel="Distortion")
ax.legend()
ax.grid(True)

plt.show()

########################################## DIVIDE LOCATIONS IN K CLUSTERS ######################################
model = cluster.KMeans(n_clusters=k, init='k-means++')
X = dtf[dtf["base"]==0][dtf["Time Interval"]<2][["Lat","Lng"]]

dtf_X = X.copy()

dtf_X["cluster"] = model.fit_predict(X)

dtf["cluster"] = dtf_X["cluster"]
dtf.sample(5)


# plot
fig, ax = plt.subplots()
palette_personalizzata = ["#FF5733", "#33FF57", "#3366FF", "#FF33A1", "#33FFFF", "#FF3366", "#FFFF33", "#9933FF", "#FF9933", "#33FF99"]
# create a graph using the sns library

sns.scatterplot(x="Lat", y="Lng", data=dtf,
                palette=sns.color_palette(palette_personalizzata,k),
                hue='cluster', legend=False, ax=ax).set_title('Clustering(k='+str(k)+'): known customers at time t=1')

ax.set_xlabel('Latitude')
ax.set_ylabel('Longitude')

ax.scatter(start[0], start[1], c='black', marker='^')
plt.show()

############################################ ROUTING WITH NEW CUSTOMERS AT t=1 #######################################
# these are variables needed in all clusters. Do not initialize in each cluster
dic_routes_clusters = {}
total_distance = 0
total_load = 0

for clust in range(k):
    lst_for_dist_matrix = dtf[dtf["base"] == 1]["node"].tolist()
    lst_for_dist_matrix += dtf[dtf["cluster"] == clust]["node"].tolist()
    lst_id_nodes = dtf[dtf["base"] == 1]["id"].tolist()
    lst_id_nodes += dtf[dtf["cluster"] == clust]["id"].tolist()

    my_dict = dict(zip(lst_id_nodes, lst_for_dist_matrix))

    # it important to associate the shortest route computed by routing model ortools
    # with the right 'id' of nodes in the dataframe dtf
    size = list(range(0, len(lst_for_dist_matrix)))

    dict_route_nodes = dict(zip(size, lst_id_nodes))

    # 'distance matrix' creates the matrix of distances for the cluster
    distance_matrix = np.asarray([[shortest_distance(a, b) for b in lst_for_dist_matrix] for a in lst_for_dist_matrix])
    # int values are needed for the routing model
    distance_matrix = (np.rint(distance_matrix)).astype(int)

    # Parameters
    driver = 1
    # we need the equivalent node in the graph
    start_node = ox.nearest_nodes(G, start[1], start[0])
    print("start node:", start_node, "| total locations to visit in the cluster:", len(lst_for_dist_matrix) - 1,
          "| drivers:", driver, "\n")
    driver_capacity = [100]
    demands = [0] + [1] * (len(lst_for_dist_matrix) - 1)
    max_distance = 100000

    # Create the routing index manager
    manager = pywrapcp.RoutingIndexManager(len(lst_for_dist_matrix), driver, lst_for_dist_matrix.index(start_node))
    # Create routing model.
    routing = pywrapcp.RoutingModel(manager)


    def distance_callback(from_index, to_index):
        """Returns the distance between the two nodes."""
        # Convert from routing variable Index to distance matrix NodeIndex.
        from_node = manager.IndexToNode(from_index)
        to_node = manager.IndexToNode(to_index)
        return distance_matrix[from_node][to_node]


    transit_callback_index = routing.RegisterTransitCallback(distance_callback)

    # Define cost of each arc.
    routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)


    # The constraint about capacity
    def get_demand(from_index):
        return demands[from_index]


    demand = routing.RegisterUnaryTransitCallback(get_demand)

    routing.AddDimensionWithVehicleCapacity(demand, slack_max=0,
                                            vehicle_capacities=driver_capacity,
                                            fix_start_cumul_to_zero=True,
                                            name='Capacity')

    # The constraint about distance
    name = 'Distance'
    routing.AddDimension(transit_callback_index, slack_max=0, capacity=max_distance,
                         fix_start_cumul_to_zero=True, name=name)
    distance_dimension = routing.GetDimensionOrDie(name)
    distance_dimension.SetGlobalSpanCostCoefficient(100)

    # Initial solution that minimizes costs
    parameters = pywrapcp.DefaultRoutingSearchParameters()
    parameters.first_solution_strategy = (routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)
    # Metaheuristic optimization of initial solution
    parameters.local_search_metaheuristic = (routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
    # The solver is configured to run for a maximum of 1 second.
    # This means that the solver will attempt to find the best solution within the specified time limit
    # and may terminate if the time limit is reached, even if an optimal solution hasn't been found yet.
    parameters.time_limit.FromSeconds(1)
    solution = routing.SolveWithParameters(parameters)


    index = routing.Start(0)
    route_idx = []
    route_distance = 0
    route_load = 0
    while not routing.IsEnd(index):
        node_index = manager.IndexToNode(index)
        route_idx.append(manager.IndexToNode(index))
        previous_index = index
        index = solution.Value(routing.NextVar(index))
        route_distance += distance_callback(previous_index, index)
        route_load += demands[node_index]  ## for data
    # in route_idx there is a sequence of descending numbers that build the route
    # in my_route, the 'id' of the nodes of the route are extracted and
    # the same route of 'route-idx' now it's shown in my_route but using the 'id' of nodes
    route_idx.append(manager.IndexToNode(index))
    my_route = [dict_route_nodes[x] for x in route_idx]
    print(my_route)
    dic_routes_clusters[clust] = my_route
    # route_distance has the distance in meters and so it is divided by 1000
    # And then the result has 2 decimals
    print(f'distance: {round(route_distance / 1000, 2)} km')
    print(f'load: {round(route_load, 2)}', "\n")
    total_distance += route_distance
    total_load += route_load

print(f'Total distance: {round(total_distance / 1000, 2)} km')
print(f'Total load: {total_load}')

################################################# ROUTING REPRESENTATION FOR ALL CLUSTERS WITH NEW CUSTOMERS AT t=1 #############################################
fig, ax = plt.subplots()
palette_personalizzata = ["#FF5733", "#33FF57", "#3366FF", "#FF33A1", "#33FFFF", "#FF3366", "#FFFF33", "#9933FF", "#FF9933", "#33FF99"]

# Scatter plot for clusters
sns.scatterplot(x="Lat", y="Lng", data=dtf,
                palette=sns.color_palette(palette_personalizzata, k),
                hue='cluster', legend=False, ax=ax).set_title('Clustering(k=' + str(k) + '): routing representation at t=1')

# Add the start point (depot)
ax.scatter(start[0], start[1], c='black', marker='^')

ax.set_xlabel('Latitude')
ax.set_ylabel('Longitude')

for k,v in dic_routes_clusters.items():
    route_coordinates = dtf.loc[v, ["Lat", "Lng"]]
    ax.plot(route_coordinates["Lat"], route_coordinates["Lng"], linestyle='--', color='black')

plt.show()

############################################## NEW CLUSTERING WITH NEW CUSTOMERS ARRIVED AT t=2 ##################################
# in X we have only customers (not the depot) that we alreary know
X = dtf[dtf["base"]==0][dtf["Time Interval"]<3][["Lat","Lng"]]
################################# FIND THE RIGHT NUMBER K OF CLUSTERS ######################################
max_k = 7
# iterations
distortions = []
for i in range(1, max_k+1):
    if len(X) >= i:
       model = cluster.KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
       model.fit(X)
       distortions.append(model.inertia_)
# best k: the lowest derivative
k = [i*100 for i in np.diff(distortions,2)].index(min([i*100 for i
     in np.diff(distortions,2)]))

# plot
fig, ax = plt.subplots()
ax.plot(range(1, len(distortions)+1), distortions)
ax.axvline(k, ls='--', color="red", label="k = "+str(k))
ax.set(title='The Elbow Method', xlabel='Number of clusters',
       ylabel="Distortion")
ax.legend()
ax.grid(True)

plt.show()

########################################## DIVIDE LOCATIONS IN K CLUSTERS ######################################
model = cluster.KMeans(n_clusters=k, init='k-means++')
X = dtf[dtf["base"]==0][dtf["Time Interval"]<3][["Lat","Lng"]]

dtf_X = X.copy()

dtf_X["cluster"] = model.fit_predict(X)

dtf["cluster"] = dtf_X["cluster"]
dtf.sample(5)


# plot
fig, ax = plt.subplots()
palette_personalizzata = ["#FF5733", "#33FF57", "#3366FF", "#FF33A1", "#33FFFF", "#FF3366", "#FFFF33", "#9933FF", "#FF9933", "#33FF99"]
# create a graph using the sns library

sns.scatterplot(x="Lat", y="Lng", data=dtf,
                palette=sns.color_palette(palette_personalizzata,k),
                hue='cluster', legend=False, ax=ax).set_title('Clustering(k='+str(k)+'): known customers at time t=2')

ax.set_xlabel('Latitude')
ax.set_ylabel('Longitude')

ax.scatter(start[0], start[1], c='black', marker='^')
plt.show()

############################################ ROUTING WITH NEW CUSTOMERS AT t=2 #######################################
# these are variables needed in all clusters. Do not initialize in each cluster
dic_routes_clusters = {}
total_distance = 0
total_load = 0

for clust in range(k):
    lst_for_dist_matrix = dtf[dtf["base"] == 1]["node"].tolist()
    lst_for_dist_matrix += dtf[dtf["cluster"] == clust]["node"].tolist()
    lst_id_nodes = dtf[dtf["base"] == 1]["id"].tolist()
    lst_id_nodes += dtf[dtf["cluster"] == clust]["id"].tolist()

    my_dict = dict(zip(lst_id_nodes, lst_for_dist_matrix))

    # it important to associate the shortest route computed by routing model ortools
    # with the right 'id' of nodes in the dataframe dtf
    size = list(range(0, len(lst_for_dist_matrix)))

    dict_route_nodes = dict(zip(size, lst_id_nodes))

    # 'distance matrix' creates the matrix of distances for the cluster
    distance_matrix = np.asarray([[shortest_distance(a, b) for b in lst_for_dist_matrix] for a in lst_for_dist_matrix])
    # int values are needed for the routing model
    distance_matrix = (np.rint(distance_matrix)).astype(int)

    # Parameters
    driver = 1
    # we need the equivalent node in the graph
    start_node = ox.nearest_nodes(G, start[1], start[0])
    print("start node:", start_node, "| total locations to visit in the cluster:", len(lst_for_dist_matrix) - 1,
          "| drivers:", driver, "\n")
    driver_capacity = [100]
    demands = [0] + [1] * (len(lst_for_dist_matrix) - 1)
    max_distance = 100000

    # Create the routing index manager
    manager = pywrapcp.RoutingIndexManager(len(lst_for_dist_matrix), driver, lst_for_dist_matrix.index(start_node))
    # Create routing model.
    routing = pywrapcp.RoutingModel(manager)


    def distance_callback(from_index, to_index):
        """Returns the distance between the two nodes."""
        # Convert from routing variable Index to distance matrix NodeIndex.
        from_node = manager.IndexToNode(from_index)
        to_node = manager.IndexToNode(to_index)
        return distance_matrix[from_node][to_node]


    transit_callback_index = routing.RegisterTransitCallback(distance_callback)

    # Define cost of each arc.
    routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)


    # The constraint about capacity
    def get_demand(from_index):
        return demands[from_index]


    demand = routing.RegisterUnaryTransitCallback(get_demand)

    routing.AddDimensionWithVehicleCapacity(demand, slack_max=0,
                                            vehicle_capacities=driver_capacity,
                                            fix_start_cumul_to_zero=True,
                                            name='Capacity')

    # The constraint about distance
    name = 'Distance'
    routing.AddDimension(transit_callback_index, slack_max=0, capacity=max_distance,
                         fix_start_cumul_to_zero=True, name=name)
    distance_dimension = routing.GetDimensionOrDie(name)
    distance_dimension.SetGlobalSpanCostCoefficient(100)

    # Initial solution that minimizes costs
    parameters = pywrapcp.DefaultRoutingSearchParameters()
    parameters.first_solution_strategy = (routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)
    # Metaheuristic optimization of initial solution
    parameters.local_search_metaheuristic = (routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
    # The solver is configured to run for a maximum of 1 second.
    # This means that the solver will attempt to find the best solution within the specified time limit
    # and may terminate if the time limit is reached, even if an optimal solution hasn't been found yet.
    parameters.time_limit.FromSeconds(1)
    solution = routing.SolveWithParameters(parameters)


    index = routing.Start(0)
    route_idx = []
    route_distance = 0
    route_load = 0
    while not routing.IsEnd(index):
        node_index = manager.IndexToNode(index)
        route_idx.append(manager.IndexToNode(index))
        previous_index = index
        index = solution.Value(routing.NextVar(index))
        route_distance += distance_callback(previous_index, index)
        route_load += demands[node_index]  ## for data
    # in route_idx there is a sequence of descending numbers that build the route
    # in my_route, the 'id' of the nodes of the route are extracted and
    # the same route of 'route-idx' now it's shown in my_route but using the 'id' of nodes
    route_idx.append(manager.IndexToNode(index))
    my_route = [dict_route_nodes[x] for x in route_idx]
    print(my_route)
    dic_routes_clusters[clust] = my_route
    # route_distance has the distance in meters and so it is divided by 1000
    # And then the result has 2 decimals
    print(f'distance: {round(route_distance / 1000, 2)} km')
    print(f'load: {round(route_load, 2)}', "\n")
    total_distance += route_distance
    total_load += route_load

print(f'Total distance: {round(total_distance / 1000, 2)} km')
print(f'Total load: {total_load}')

################################################# ROUTING REPRESENTATION FOR ALL CLUSTERS WITH NEW CUSTOMERS AT t=2 #############################################
fig, ax = plt.subplots()
palette_personalizzata = ["#FF5733", "#33FF57", "#3366FF", "#FF33A1", "#33FFFF", "#FF3366", "#FFFF33", "#9933FF", "#FF9933", "#33FF99"]

# Scatter plot for clusters
sns.scatterplot(x="Lat", y="Lng", data=dtf,
                palette=sns.color_palette(palette_personalizzata, k),
                hue='cluster', legend=False, ax=ax).set_title('Clustering(k=' + str(k) + '): routing representation at t=2')

# Add the start point (depot)
ax.scatter(start[0], start[1], c='black', marker='^')

ax.set_xlabel('Latitude')
ax.set_ylabel('Longitude')

for k,v in dic_routes_clusters.items():
    route_coordinates = dtf.loc[v, ["Lat", "Lng"]]
    ax.plot(route_coordinates["Lat"], route_coordinates["Lng"], linestyle='--', color='black')

plt.show()

############################################## NEW CLUSTERING WITH NEW CUSTOMERS ARRIVED AT t=3 ##################################
# in X we have only customers (not the depot) that we alreary know
X = dtf[dtf["base"]==0][dtf["Time Interval"]<4][["Lat","Lng"]]
################################# FIND THE RIGHT NUMBER K OF CLUSTERS ######################################
max_k = 7
# iterations
distortions = []
for i in range(1, max_k+1):
    if len(X) >= i:
       model = cluster.KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
       model.fit(X)
       distortions.append(model.inertia_)
# best k: the lowest derivative
k = [i*100 for i in np.diff(distortions,2)].index(min([i*100 for i
     in np.diff(distortions,2)]))

# plot
fig, ax = plt.subplots()
ax.plot(range(1, len(distortions)+1), distortions)
ax.axvline(k, ls='--', color="red", label="k = "+str(k))
ax.set(title='The Elbow Method', xlabel='Number of clusters',
       ylabel="Distortion")
ax.legend()
ax.grid(True)

plt.show()

########################################## DIVIDE LOCATIONS IN K CLUSTERS ######################################
model = cluster.KMeans(n_clusters=k, init='k-means++')
X = dtf[dtf["base"]==0][dtf["Time Interval"]<4][["Lat","Lng"]]

dtf_X = X.copy()

dtf_X["cluster"] = model.fit_predict(X)

dtf["cluster"] = dtf_X["cluster"]
dtf.sample(5)


# plot
fig, ax = plt.subplots()
palette_personalizzata = ["#FF5733", "#33FF57", "#3366FF", "#FF33A1", "#33FFFF", "#FF3366", "#FFFF33", "#9933FF", "#FF9933", "#33FF99"]
# create a graph using the sns library

sns.scatterplot(x="Lat", y="Lng", data=dtf,
                palette=sns.color_palette(palette_personalizzata,k),
                hue='cluster', legend=False, ax=ax).set_title('Clustering(k='+str(k)+'): known customers at time t=3')

ax.set_xlabel('Latitude')
ax.set_ylabel('Longitude')

ax.scatter(start[0], start[1], c='black', marker='^')
plt.show()

############################################ ROUTING WITH NEW CUSTOMERS AT t=3 #######################################
# these are variables needed in all clusters. Do not initialize in each cluster
dic_routes_clusters = {}
total_distance = 0
total_load = 0

for clust in range(k):
    lst_for_dist_matrix = dtf[dtf["base"] == 1]["node"].tolist()
    lst_for_dist_matrix += dtf[dtf["cluster"] == clust]["node"].tolist()
    lst_id_nodes = dtf[dtf["base"] == 1]["id"].tolist()
    lst_id_nodes += dtf[dtf["cluster"] == clust]["id"].tolist()

    my_dict = dict(zip(lst_id_nodes, lst_for_dist_matrix))

    # it important to associate the shortest route computed by routing model ortools
    # with the right 'id' of nodes in the dataframe dtf
    size = list(range(0, len(lst_for_dist_matrix)))

    dict_route_nodes = dict(zip(size, lst_id_nodes))

    # 'distance matrix' creates the matrix of distances for the cluster
    distance_matrix = np.asarray([[shortest_distance(a, b) for b in lst_for_dist_matrix] for a in lst_for_dist_matrix])
    # int values are needed for the routing model
    distance_matrix = (np.rint(distance_matrix)).astype(int)

    # Parameters
    driver = 1
    # we need the equivalent node in the graph
    start_node = ox.nearest_nodes(G, start[1], start[0])
    print("start node:", start_node, "| total locations to visit in the cluster:", len(lst_for_dist_matrix) - 1,
          "| drivers:", driver, "\n")
    driver_capacity = [100]
    demands = [0] + [1] * (len(lst_for_dist_matrix) - 1)
    max_distance = 100000

    # Create the routing index manager
    manager = pywrapcp.RoutingIndexManager(len(lst_for_dist_matrix), driver, lst_for_dist_matrix.index(start_node))
    # Create routing model.
    routing = pywrapcp.RoutingModel(manager)


    def distance_callback(from_index, to_index):
        """Returns the distance between the two nodes."""
        # Convert from routing variable Index to distance matrix NodeIndex.
        from_node = manager.IndexToNode(from_index)
        to_node = manager.IndexToNode(to_index)
        return distance_matrix[from_node][to_node]


    transit_callback_index = routing.RegisterTransitCallback(distance_callback)

    # Define cost of each arc.
    routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)


    # The constraint about capacity
    def get_demand(from_index):
        return demands[from_index]


    demand = routing.RegisterUnaryTransitCallback(get_demand)

    routing.AddDimensionWithVehicleCapacity(demand, slack_max=0,
                                            vehicle_capacities=driver_capacity,
                                            fix_start_cumul_to_zero=True,
                                            name='Capacity')

    # The constraint about distance
    name = 'Distance'
    routing.AddDimension(transit_callback_index, slack_max=0, capacity=max_distance,
                         fix_start_cumul_to_zero=True, name=name)
    distance_dimension = routing.GetDimensionOrDie(name)
    distance_dimension.SetGlobalSpanCostCoefficient(100)

    # Initial solution that minimizes costs
    parameters = pywrapcp.DefaultRoutingSearchParameters()
    parameters.first_solution_strategy = (routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)
    # Metaheuristic optimization of initial solution
    parameters.local_search_metaheuristic = (routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH)
    # The solver is configured to run for a maximum of 1 second.
    # This means that the solver will attempt to find the best solution within the specified time limit
    # and may terminate if the time limit is reached, even if an optimal solution hasn't been found yet.
    parameters.time_limit.FromSeconds(1)
    solution = routing.SolveWithParameters(parameters)


    index = routing.Start(0)
    route_idx = []
    route_distance = 0
    route_load = 0
    while not routing.IsEnd(index):
        node_index = manager.IndexToNode(index)
        route_idx.append(manager.IndexToNode(index))
        previous_index = index
        index = solution.Value(routing.NextVar(index))
        route_distance += distance_callback(previous_index, index)
        route_load += demands[node_index]  ## for data
    # in route_idx there is a sequence of descending numbers that build the route
    # in my_route, the 'id' of the nodes of the route are extracted and
    # the same route of 'route-idx' now it's shown in my_route but using the 'id' of nodes
    route_idx.append(manager.IndexToNode(index))
    my_route = [dict_route_nodes[x] for x in route_idx]
    print(my_route)
    dic_routes_clusters[clust] = my_route
    # route_distance has the distance in meters and so it is divided by 1000
    # And then the result has 2 decimals
    print(f'distance: {round(route_distance / 1000, 2)} km')
    print(f'load: {round(route_load, 2)}', "\n")
    total_distance += route_distance
    total_load += route_load

print(f'Total distance: {round(total_distance / 1000, 2)} km')
print(f'Total load: {total_load}')

################################################# ROUTING REPRESENTATION FOR ALL CLUSTERS WITH NEW CUSTOMERS AT t=3 #############################################
fig, ax = plt.subplots()
palette_personalizzata = ["#FF5733", "#33FF57", "#3366FF", "#FF33A1", "#33FFFF", "#FF3366", "#FFFF33", "#9933FF", "#FF9933", "#33FF99"]

# Scatter plot for clusters
sns.scatterplot(x="Lat", y="Lng", data=dtf,
                palette=sns.color_palette(palette_personalizzata, k),
                hue='cluster', legend=False, ax=ax).set_title('Clustering(k=' + str(k) + '): routing representation at t=3')

# Add the start point (depot)
ax.scatter(start[0], start[1], c='black', marker='^')

ax.set_xlabel('Latitude')
ax.set_ylabel('Longitude')

for k,v in dic_routes_clusters.items():
    route_coordinates = dtf.loc[v, ["Lat", "Lng"]]
    ax.plot(route_coordinates["Lat"], route_coordinates["Lng"], linestyle='--', color='black')

plt.show()
