from matplotlib import pyplot as plt
from xgboost import XGBRegressor
import random
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans,DBSCAN
from sklearn.metrics import silhouette_score
from kmodes.kprototypes import KPrototypes
from scipy.spatial.distance import pdist
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import seaborn as sns
import category_encoders as ce
from utils import flatten_categorical_columns, reverse_get_dummies
from sklearn.ensemble import IsolationForest
np.set_printoptions(suppress=True)
import matplotlib.patches as mpatches
import time

from sklearn.preprocessing import MinMaxScaler

pd.set_option('display.float_format', '{:.6f}'.format)
"""
def robust_clustering_hdbscan(X, cont_features, cat_cols, min_cluster_size=3):
    import hdbscan
   
    #HDBSCAN con feature engineering per enfatizzare NOZZLE_TYPE.
  
    # Normalizza i dati
    X_scaled = X.copy()
    scaler = StandardScaler()
    X_scaled[cont_features] = scaler.fit_transform(X[cont_features])
    
    # Crea feature sintetiche per amplificare l'effetto di NOZZLE_TYPE
    # One-hot encoding del NOZZLE_TYPE
    X_nozzle = pd.get_dummies(X_scaled['NOZZLE_TYPE'], prefix='NOZZLE_TYPE')
    
    # Amplifica le feature di NOZZLE_TYPE moltiplicandole
    nozzle_weight = 1.0
    for col in X_nozzle.columns:
        X_nozzle[col] = X_nozzle[col] * nozzle_weight
    
    # Combina tutte le feature
    X_combined = pd.concat([X_scaled[cont_features], X_nozzle], axis=1)
    
    # HDBSCAN clustering
    clusterer = hdbscan.HDBSCAN(
        min_cluster_size=min_cluster_size,
        gen_min_span_tree=True,
        prediction_data=True
    )
    labels = clusterer.fit_predict(X_combined)
    
    # Aggiungi etichette al dataframe originale
    clusters = X.copy()
    clusters['cluster'] = labels
    clusters['outlier_score'] = clusterer.outlier_scores_
    outliers = clusters[clusters['cluster'] == -1]
    
    # Analisi dei cluster rispetto a NOZZLE_TYPE
    cluster_nozzle = pd.crosstab(clusters['cluster'], clusters['NOZZLE_TYPE'])
    print(f"Numero totale di cluster: {len(clusters['cluster'].unique()) - (1 if -1 in clusters['cluster'].unique() else 0)}")
    print(f"Numero di outlier: {len(outliers)} ({len(outliers)/len(X)*100:.1f}%)")
    print("\nDistribuzione dei NOZZLE_TYPE nei cluster:")
    print(cluster_nozzle)
    
    return clusters, outliers, clusterer
"""


def find_optimal_k(X, cont_features, cat_cols, max_k=10, threshold=0.1):
    """
    Trova il K ottimale usando elbow method e silhouette score
    Returns: optimal_k
    """
    costs = []
    silhouette_scores = []
    # Check minimum required samples
    n_samples = len(X)
    if n_samples < 3:  # Need at least 3 samples for meaningful clustering
        return 2
    
    # Adjust max_k based on sample size
    max_k = min(max_k, n_samples - 1)
    ks = range(2, max_k + 1)
    
    categorical_indices = [len(cont_features) + i for i in range(len(cat_cols))]
    
    for k in ks:
        kproto = KPrototypes(
            n_clusters=k,
            init='Cao',
            n_init=20,
            random_state=42,
            verbose=0,
            gamma=7.0
        )
        
        labels = kproto.fit_predict(X.values, categorical=categorical_indices)
        costs.append(kproto.cost_)
        
        # Calcola silhouette score
        X_dummies = flatten_categorical_columns(X,cat_cols, only_quality_cut=False)
        sil_score = silhouette_score(X_dummies.values, labels)
        silhouette_scores.append(sil_score)
        
    # Trova il punto di elbow usando la massima curvatura
    costs_array = np.array(costs)
    nPoints = len(costs_array)
    allCoord = np.vstack((range(nPoints), costs_array)).T
    
    # Vettori normalizzati
    firstPoint = allCoord[0]
    lineVec = allCoord[-1] - allCoord[0]
    lineVecNorm = lineVec / np.sqrt(np.sum(lineVec**2))
    
    # Distanze dalla linea
    vecFromFirst = allCoord - firstPoint
    scalarProduct = np.sum(vecFromFirst * lineVecNorm, axis=1)
    vecFromFirstParallel = np.outer(scalarProduct, lineVecNorm)
    vecToLine = vecFromFirst - vecFromFirstParallel
    distToLine = np.sqrt(np.sum(vecToLine ** 2, axis=1))
    
    # Trova il punto di massima curvatura
    optimal_k = np.argmax(distToLine) + 2  # +2 perché partiamo da k=2
    
    # Plot dei risultati
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    ax1.plot(ks, costs, 'bo-')
    ax1.set_xlabel('Number of Clusters (k)')
    ax1.set_ylabel('Cost')
    ax1.set_title('Elbow Method')
    ax1.axvline(x=optimal_k, color='r', linestyle='--')
    
    ax2.plot(ks, silhouette_scores, 'go-')
    ax2.set_xlabel('Number of Clusters (k)')
    ax2.set_ylabel('Silhouette Score')
    ax2.set_title('Silhouette Score')
    
    plt.tight_layout()
    plt.show()
    
    # Considera anche il silhouette score
    sil_optimal_k = np.argmax(silhouette_scores) + 2
    
    # Scegli k basandoti sia su elbow che su silhouette
    final_k = min(optimal_k, sil_optimal_k)
    print(f"Optimal k (elbow): {optimal_k}")
    print(f"Optimal k (silhouette): {sil_optimal_k}")
    print(f"Final optimal k: {final_k}")
    
    return final_k


def best_stratified_in_cluster_matching(bad_configs, good_configs, cont_cols, cat_cols_onehot):
    """
    Stratified matching: dividiamo lo spazio in zone concentriche attorno al centroide.
    """

    if len(good_configs) == 0:
        return []

    # Standardizzazione per confronti uniformi
    good_scaled= good_configs[cont_cols+cat_cols_onehot].copy()
    bad_scaled= bad_configs[cont_cols+cat_cols_onehot].copy()

    # Calcola centroide (nello spazio normalizzato)
    centroid = np.mean(good_scaled, axis=0)  # Media delle righe
    
    # Calcola e ordina distanze dal centroide
    centroid_distances = []
    for idx, row in good_scaled.iterrows():
        # Usiamo .loc per accedere alla riga tramite indice
        good_point_values = good_scaled.loc[idx]
        dist = np.sqrt(np.sum((good_point_values - centroid)**2))
        centroid_distances.append((idx, dist, good_point_values))
    
    # Ordina per distanza e divide in strati
    centroid_distances.sort(key=lambda x: x[1])
    n_strata = min(3, len(good_scaled))  # Max 3 strati
    strata_size = len(centroid_distances) // n_strata
    
    # Divide in strati (core, mid, edge)
    strata = []
    for i in range(n_strata):
        start = i * strata_size
        end = None if i == (n_strata - 1) else (i + 1) * strata_size
        strata.append([idx for idx, _, _ in centroid_distances[start:end]])
    
    pairs = []
    
    for bad_idx, bad_row in bad_scaled.iterrows():
        # si sceglie lo strato in modo ciclico per variare le associazioni
        stratum_idx = list(bad_scaled.index).index(bad_idx) % n_strata
        selected_stratum = strata[stratum_idx]
        
        if not selected_stratum:
            # Fallback se lo strato è vuoto
            good_idx = centroid_distances[0][0] if centroid_distances else good_scaled.index[0]
        else:
            # Scegliamo il punto più vicino entro lo strato selezionato
            bad_point_values = bad_scaled.loc[bad_idx]  # Accedi usando .loc invece di []
            min_dist = float('inf')
            good_idx = None
            
            for idx in selected_stratum:
                good_point_values = good_scaled.loc[idx] 
                
                dist = np.sqrt(np.sum((good_point_values - bad_point_values)**2))
                if dist < min_dist:
                    min_dist = dist
                    good_idx = idx
        
        # Aggiungiamo la coppia usando gli indici originali
        pairs.append((pd.DataFrame(bad_configs.loc[bad_idx].to_frame().T), pd.DataFrame(good_configs.loc[good_idx].to_frame().T)))

    return pairs


def pair_configs(bad_configs, good_configs, cont_features, cat_cols):
    """
    bad_configs: DataFrame con le configurazioni difettose per un dato materiale e spessore
    good_configs: DataFrame con le configurazioni buone per un dato materiale e spessore
    cont_features: Lista delle feature continue es. ["THICKNESS_TULUS [mm]", "CONTOUR_SPEED [mm]", "LASER_POWER [W]",
                                "CONTOUR_GAS_PRESSURE [bar]", "CONTOUR_NOZZLE_DISTANCE [mm]", "CONTOUR_FOCAL [mm]"]
    cat_cols: Lista delle feature categoriche es. ['NOZZLE_TYPE']
    output: Lista di tuple (DataFrame_bad, DataFrame_good) con le configurazioni accoppiate
    """
    if len(bad_configs) == 0 or len(good_configs) == 0:
        return []

    features = cont_features + cat_cols
    scaler = MinMaxScaler()
    
    # Normalizzazione feature continue
    bad_cont = bad_configs[cont_features].copy()
    good_cont = good_configs[cont_features].copy()
    
    combined = pd.concat([bad_cont, good_cont], axis=0, ignore_index=True)
    # Fit scaler su tutte le configurazioni
    scaler.fit(combined)
    bad_scaled = scaler.transform(bad_cont)
    good_scaled = scaler.transform(good_cont)
    
    # Mappatura da index -> posizione per accesso coerente
    bad_idx_map = {idx: i for i, idx in enumerate(bad_configs.index)}
    good_idx_map = {idx: i for i, idx in enumerate(good_configs.index)}
    
    pairs = []

    for bad_idx in bad_configs.index:
        bad_conf = bad_configs.loc[bad_idx]
        bad_pos = bad_idx_map[bad_idx]
        
        min_dist = float('inf')
        best_good_idx = None

        for good_idx in good_configs.index:
            good_conf = good_configs.loc[good_idx]
            good_pos = good_idx_map[good_idx]
            
            # Distanza Gower sulle continue
            dist = np.sum(np.abs(bad_scaled[bad_pos] - good_scaled[good_pos]))
            
            # Distanza Gower sulle categoriche (NOZZLE_TYPE)
            for feat in cat_cols:
                if feat in bad_conf and feat in good_conf:
                    dist += int(bad_conf[feat] != good_conf[feat])
             
            
            # Normalizzazione
            gower_dist = dist / (len(cont_features) + len(cat_cols))

            if gower_dist < min_dist:
                min_dist = gower_dist
                best_good_idx = good_idx

        if best_good_idx is not None:
            pairs.append((
                pd.DataFrame(bad_conf).T,
                pd.DataFrame(good_configs.loc[best_good_idx]).T
            ))

    return pairs



def pair_configs_clustered(bad_configs, good_configs, cont_features,cat_cols):
    
    cont_features=[f for f in cont_features if f!='TICKNESS_TULUS [mm]']
    old_nozzles =[col for col in good_configs.columns if col.startswith('NOZZLE_TYPE_')]
    old_materials=[col for col in good_configs.columns if col.startswith('MATERIAL_NAME_TULUS_')]
    
    scaler = StandardScaler()
    # K-prototypes gestisce anche le colonne categoriche, quindi per semplicità rimuoviamo il one-hot encoding
    good_configs=reverse_get_dummies(good_configs, cat_cols,keep_old_features=True)
    bad_configs=reverse_get_dummies(bad_configs, cat_cols,keep_old_features=True)
    print(f"good_configs shape: {good_configs.shape}, bad_configs shape: {bad_configs.shape}")
    good_configs[cont_features]= scaler.fit_transform(good_configs[cont_features])
    bad_configs[cont_features]= scaler.transform(bad_configs[cont_features])
    good_configs=good_configs[cont_features + cat_cols + old_nozzles +['THICKNESS_TULUS [mm]'] + old_materials + ['DEFECT_TYPE']]
    bad_configs=bad_configs[cont_features + cat_cols + old_nozzles +['THICKNESS_TULUS [mm]'] + old_materials + ['DEFECT_TYPE']]
    n_clusters=len(old_nozzles)
    # K-prototypes
    if len(good_configs)>= n_clusters:
        kproto = KPrototypes(n_init=20 ,n_clusters=n_clusters, init='Cao', verbose=0, gamma=7.0,random_state=42)
    else:
        kproto = KPrototypes(n_init=20 ,n_clusters=len(good_configs), init='Cao', verbose=0, gamma=7.0,random_state=42)
    final_pairs=[]
    #find_optimal_k(good_configs[cont_features+cat_cols],cont_features,cat_cols, max_k=10)
    #robust_clustering_hdbscan(good_configs[cont_features+cat_cols], cont_features, cat_cols)
    good_labels=kproto.fit_predict(good_configs[cont_features+cat_cols].values, categorical=[len(cont_features) + i for i in range(len(cat_cols))])
    bad_labels=kproto.predict(bad_configs[cont_features+cat_cols].values, categorical=[len(cont_features) + i for i in range(len(cat_cols))])

    if len(good_labels) > 1:
        good_configs['cluster']=good_labels
        bad_configs['cluster']=bad_labels
       
        for i in bad_configs['cluster'].unique():
            print(f"Cluster {i} - Bad configs: {len(bad_configs[bad_configs['cluster']==i])}, Good configs: {len(good_configs[good_configs['cluster']==i])}")
            curr_cluster_bad_configs=bad_configs[bad_configs['cluster']==i].copy().drop(columns=['cluster'])
            curr_cluster_good_configs=good_configs[good_configs['cluster']==i].copy().drop(columns=['cluster'])

            pairs=best_stratified_in_cluster_matching(curr_cluster_bad_configs.copy(),curr_cluster_good_configs.copy(),cont_features,old_nozzles)
            final_pairs.extend(pairs)
           

    else:
        good_configs=good_configs.drop(columns=cat_cols+['cluster'])
        bad_configs=bad_configs.drop(columns=cat_cols+['cluster'])
        good_configs[cont_features]=scaler.inverse_transform(good_configs[cont_features])
        bad_configs[cont_features]=scaler.inverse_transform(bad_configs[cont_features])
        final_pairs=[(bad_conf,good_configs) for bad_conf in bad_configs.iterrows()]

    denorm_final_pairs=[]
    for bad_conf, good_conf in final_pairs:
        
        bad_conf[cont_features] = scaler.inverse_transform(bad_conf[cont_features])
        good_conf[cont_features] = scaler.inverse_transform(good_conf[cont_features])
        denorm_final_pairs.append((bad_conf, good_conf))
        #print(f"Denormalized Bad config: {bad_conf.iloc[0]}, Denormalized Good config: {good_conf.iloc[0]}")
    return denorm_final_pairs

 





def kproto_clustering():
    cont_features = ["THICKNESS_TULUS [mm]", "CONTOUR_SPEED [mm/min]", "LASER_POWER [W]",
                            "CONTOUR_GAS_PRESSURE [bar]", 'CONTOUR_NOZZLE_DISTANCE [mm]', "CONTOUR_FOCAL [mm]"]
    final_data=pd.read_excel('outlier_detection_output/final_data_cleaned.xlsx')

    print(f"final_data2 length: {len(final_data)}")
    final_data=final_data.drop_duplicates(subset=[col for col in final_data.columns if col not in ['DEFECT_TYPE', 'QUALITY_CUT','origin']], keep='first', inplace=False).reset_index(drop=True)
    print(f"final_data2 length after cleaning: {len(final_data)}")
    final_data_copy=final_data.copy()

    materials_thick = {
        material: sorted(
            final_data.loc[
                final_data["MATERIAL_NAME_TULUS"] == material, 
                "THICKNESS_TULUS [mm]"
            ].dropna().unique().tolist()
        )
        for material in final_data["MATERIAL_NAME_TULUS"].unique()
    }
    defect='Burr'
    cat_cols = ['NOZZLE_TYPE']
    final_data=final_data[cont_features+['NOZZLE_TYPE','MATERIAL_NAME_TULUS','DEFECT_TYPE']].copy()
    new_cont_features=[col for col in cont_features  if col!= 'THICKNESS_TULUS [mm]']

    for material in materials_thick.keys():
        for thickness in materials_thick[material]:    
            
            final_data2_bad=final_data[(final_data["DEFECT_TYPE"]==defect) & (final_data['MATERIAL_NAME_TULUS']==material) & (final_data['THICKNESS_TULUS [mm]']==thickness)].copy()
            final_data2_bad=final_data2_bad.drop(columns=['DEFECT_TYPE','MATERIAL_NAME_TULUS','THICKNESS_TULUS [mm]'],axis=1)
            final_data2_good=final_data[(final_data["DEFECT_TYPE"]=='No Defects') & (final_data['MATERIAL_NAME_TULUS']==material) & (final_data['THICKNESS_TULUS [mm]']==thickness)].copy()
            final_data2_good=final_data2_good.drop(columns=['DEFECT_TYPE','MATERIAL_NAME_TULUS','THICKNESS_TULUS [mm]'],axis=1)
            n_clusters=len(final_data2_good["NOZZLE_TYPE"].unique())+2
            
    
            if final_data2_good.shape[0] > 0 and final_data2_bad.shape[0] > 0:
                scaler = StandardScaler()
                final_data2_good[new_cont_features] = scaler.fit_transform(final_data2_good[new_cont_features])
                final_data2_bad[new_cont_features] = scaler.transform(final_data2_bad[new_cont_features])
                if final_data2_good.shape[0] < n_clusters:
                    kproto = KPrototypes(n_init=1 ,n_clusters=len(final_data2_good), init='Cao', verbose=0, gamma=7.0)
                else:
                    kproto = KPrototypes(n_init=1 , n_clusters=n_clusters, init='Cao', verbose=0, gamma=7.0)
                # Fit the model  
                
                clusters = kproto.fit_predict(final_data2_good.copy(), categorical=[len(new_cont_features) + i for i in range(len(cat_cols))])
                bad_clusters = kproto.predict(final_data2_bad.copy(), categorical=[len(new_cont_features) + i for i in range(len(cat_cols))])
                final_data2_good['cluster'] = clusters
                final_data2_bad['cluster'] = bad_clusters
                print(f'Clusters desired split: {final_data2_good["NOZZLE_TYPE"].unique()}')
                print(f" \n MATERIAL_NAME_TULUS : {material} \n THICKNESS_TULUS [mm] : {thickness} \n total_good_samples : {final_data2_good.shape[0]}\n total_bad_samples : {final_data2_bad.shape[0]} Clusters: {clusters}")
                print("\nCentroidi categorici:")
                print(f"{kproto.cluster_centroids_}\n")
                print(f"good_clusters: \n")
                for row in final_data2_good.iterrows():
                    print(f"Cluster: {row[1]['cluster']} , THICKNESS_TULUS [mm]: {thickness}, MATERIAL_NAME_TULUS: {material}, NOZZLE_TYPE: {row[1]['NOZZLE_TYPE']}, CONTOUR_SPEED: {row[1]['CONTOUR_SPEED [mm/min]']}, LASER_POWER: {row[1]['LASER_POWER [W]']}, CONTOUR_GAS_PRESSURE: {row[1]['CONTOUR_GAS_PRESSURE [bar]']}, CONTOUR_NOZZLE_DISTANCE: {row[1]['CONTOUR_NOZZLE_DISTANCE [mm]']}, CONTOUR_FOCAL: {row[1]['CONTOUR_FOCAL [mm]']}")
                print(f"\n\n\n")
                print(f"bad_clusters: \n")
                for row in final_data2_bad.iterrows():
                    print(f"Cluster: {row[1]['cluster']} , THICKNESS_TULUS [mm]: {thickness}, MATERIAL_NAME_TULUS: {material}, NOZZLE_TYPE: {row[1]['NOZZLE_TYPE']}, CONTOUR_SPEED: {row[1]['CONTOUR_SPEED [mm/min]']}, LASER_POWER: {row[1]['LASER_POWER [W]']}, CONTOUR_GAS_PRESSURE: {row[1]['CONTOUR_GAS_PRESSURE [bar]']}, CONTOUR_NOZZLE_DISTANCE: {row[1]['CONTOUR_NOZZLE_DISTANCE [mm]']}, CONTOUR_FOCAL: {row[1]['CONTOUR_FOCAL [mm]']}")
                #calcoliamo il gamma che regola l'influenza delle features categoriche nella scelta dei cluster
                X_num = final_data2_good[new_cont_features].values
                distances = pdist(X_num, metric='euclidean')
                mean_dist = distances.mean()
                n_cat = len(cat_cols)  # 1 in questo caso
                gamma = mean_dist / n_cat
                print(f"Gamma calcolato (default interno): {gamma}\n\n")