import random
from re import X
from typing import final
import pandas as pd
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.mixture import GaussianMixture
import numpy as np
from sklearn.model_selection import ParameterGrid, cross_val_score
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import GridSearchCV,HalvingGridSearchCV,StratifiedKFold
from sklearn.preprocessing import MinMaxScaler,StandardScaler,LabelEncoder
from sklearn.model_selection import ParameterGrid
from tqdm import tqdm
from catboost import CatBoostClassifier
from utils import flatten_categorical_columns, reverse_get_dummies
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
np.set_printoptions(suppress=True)


def split_data(df,only_quality_cut=True,validation=False):

    if only_quality_cut:
        y = df['QUALITY_CUT']  # La colonna target
    else:
        y = df['DEFECT_TYPE']  # La colonna target
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)
    if validation:
      X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.1,stratify=y)
    else:
      X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2,stratify=y)
    return X_train, X_test, y_train, y_test


def apply_KNN(X_train, X_test, y_train, y_test):

  continous_features=["THICKNESS_TULUS [mm]","CONTOUR_SPEED [mm/min]","LASER_POWER [W]","CONTOUR_GAS_PRESSURE [bar]","CONTOUR_NOZZLE_DISTANCE [mm]","CONTOUR_FOCAL [mm]"]
  scaler = StandardScaler()  # Oppure MinMaxScaler()
  X_train[continous_features] = scaler.fit_transform(X_train[continous_features])
  X_test[continous_features] = scaler.transform(X_test[continous_features])
  X_train_scaled = X_train.copy()
  X_test_scaled = X_test.copy()
  X_test_scaled=X_test_scaled[X_train_scaled.columns]
  le= LabelEncoder()
  y_train=le.fit_transform(y_train)
  y_test=le.transform(y_test)
  print("Classi trovate:", le.classes_)
  print("Mapping testo → numero:")
  for i, label in enumerate(le.classes_):
    print(f"{label} → {i}")
 
  # Valutazione sul test set con il modello ottimizzato
  best_mlp = KNeighborsClassifier(algorithm= 'auto', leaf_size= 30, metric= 'manhattan', metric_params= None, n_neighbors= 9, p= 2, weights= 'uniform')
  best_mlp.fit(X_train_scaled, y_train)
  train_predictions = best_mlp.predict(X_train_scaled)
  y_pred = best_mlp.predict(X_test_scaled)
  print(y_pred)
  report = classification_report(y_test, y_pred,zero_division=0.0)
  train_report = classification_report(y_train, train_predictions,zero_division=0.0)  
  # Stampa il report di classificazione
  print("KNN Classifier") 
  print("Train Classification Report:")
  print(train_report)
  print("Test Classification Report:") 
  print(report)  



def mlp_classifier(X_train, X_test, y_train, y_test):

    #Migliori parametri trovati:
  best_params={'activation': 'relu', 'alpha': 0.001, 'batch_size': 64, 'early_stopping': True, 'hidden_layer_sizes': (50, 30), 'learning_rate': 'constant', 'learning_rate_init': 0.005, 'max_iter': 300, 'solver': 'adam', 'validation_fraction': 0.2}
  continous_features=["THICKNESS_TULUS [mm]","CONTOUR_SPEED [mm/min]","LASER_POWER [W]","CONTOUR_GAS_PRESSURE [bar]","CONTOUR_NOZZLE_DISTANCE [mm]","CONTOUR_FOCAL [mm]"]
  scaler = StandardScaler()  # Oppure MinMaxScaler()
  X_train[continous_features] = scaler.fit_transform(X_train[continous_features])
  X_test[continous_features] = scaler.transform(X_test[continous_features])
  X_train_scaled = X_train.copy()
  X_test_scaled = X_test.copy()
  X_test_scaled=X_test_scaled[X_train_scaled.columns]
  le= LabelEncoder()
  y_train=le.fit_transform(y_train)
  y_test=le.transform(y_test)
  print("Classi trovate:", le.classes_)
  print("Mapping testo → numero:")
  for i, label in enumerate(le.classes_):
    print(f"{label} → {i}")
 
  # Valutazione sul test set con il modello ottimizzato
  best_mlp = MLPClassifier(**best_params,random_state=42)
  best_mlp.fit(X_train_scaled, y_train)
  train_predictions = best_mlp.predict(X_train_scaled)
  y_pred = best_mlp.predict(X_test_scaled)
  print(y_pred)
  report = classification_report(y_test, y_pred,zero_division=0.0)
  train_report = classification_report(y_train, train_predictions,zero_division=0.0)  
  # Stampa il report di classificazione
  print("MLP Classifier") 
  print("Train Classification Report:")
  print(train_report)
  print("Test Classification Report:") 
  print(report)  

def catboost_classifier(input_dir):

    X_test=pd.read_excel(input_dir)
    final_data2=pd.read_excel('outlier_detection_output/final_data_cleaned.xlsx').sample(frac=1, random_state=42)
    X_test=X_test[final_data2.columns]
    y_train=final_data2['DEFECT_TYPE'].copy()
    X_train=final_data2.copy().drop(columns=["DEFECT_TYPE"],axis=1)
    X_test = X_test.copy()
    y_test = pd.Series(['No Defects'] * len(X_test), name='DEFECT_TYPE')  # create dummy y_test
    X_test = X_test.drop(columns=["DEFECT_TYPE"], errors='ignore')  # ensure it's dropped

    cat_col_indices=['NOZZLE_TYPE','MATERIAL_NAME_TULUS']
    model = CatBoostClassifier(min_data_in_leaf=10,learning_rate=0.05,l2_leaf_reg=5,iterations=500,grow_policy='Depthwise',depth=8,bootstrap_type='Bernoulli', random_state=42, verbose=0,eval_fraction=0.2)
    model.fit(X_train, y_train, cat_features=cat_col_indices, verbose=0)
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)
    print(y_pred_test)
    print("CatBoost Classifier")
    print("Train Classification Report:")
    print(classification_report(y_train, y_pred_train, zero_division=0.0))
    print("Test Classification Report:")
    print(classification_report(y_test, y_pred_test, zero_division=0.0))
    

def apply_random_forest( X_train, X_test, y_train, y_test):

    continous_features=["THICKNESS_TULUS [mm]","CONTOUR_SPEED [mm/min]","LASER_POWER [W]","CONTOUR_GAS_PRESSURE [bar]","CONTOUR_NOZZLE_DISTANCE [mm]","CONTOUR_FOCAL [mm]"]
    classes = ['0', '1', '2', '3', '4']
    metrics = ['precision', 'recall', 'f1-score', 'support']
    report_accumulator = {cls: {m: 0.0 for m in metrics} for cls in classes}
    report_train_accumulator = {cls: {m: 0.0 for m in metrics} for cls in classes}
   
  
    """
    rf_model = RandomForestClassifier(n_estimators= 100,criterion= 'entropy',bootstrap= True, ccp_alpha=0.0005, class_weight= None, max_depth= 20,
                                    max_features= 'log2', max_leaf_nodes= None, min_impurity_decrease= 0.0005,
                                    min_samples_leaf= 2, min_samples_split= 2 )
    """
    rf_model = RandomForestClassifier(
        random_state=42,
        bootstrap=True,
        ccp_alpha=0.0,
        class_weight=None,
        criterion='entropy',
        max_depth=40,
        max_features='sqrt',
        max_leaf_nodes=None,
        min_impurity_decrease=0.0,
        min_samples_leaf=2,
        min_samples_split=5,
        n_estimators=50,
    )
    rf_model.fit(X_train, y_train)
    y_pred_train = rf_model.predict(X_train)
    y_pred_dt = rf_model.predict(X_test)
    print(y_pred_dt)

    print("Random Forest Classifier")
    print("Train Classification Report:")
    print(classification_report(y_train, y_pred_train,zero_division=0.0))
    print("Test Classification Report:")
    print(classification_report(y_test, y_pred_dt,zero_division=0.0))


def mean_mae_evaluation(input_dir):
    nozzle_mapping = {
        "SMT 5.0": 0,
        "RT 4.0": 1,
        "RT 6.0": 2,
        "RT 10.0": 3,
    }
    all_good_configs = pd.read_excel("Outlier_detection_output/final_data_cleaned.xlsx")
    predictions = pd.read_excel(input_dir)
    all_good_configs = all_good_configs[all_good_configs["DEFECT_TYPE"] == "No Defects"].reset_index(drop=True)
    predictions = predictions.drop(columns=["DEFECT_TYPE"], axis=1, errors='ignore').reset_index(drop=True)

    # Colonne categoriche e di grouping
    grouping_cols = ['MATERIAL_NAME_TULUS', 'THICKNESS_TULUS [mm]', 'NOZZLE_TYPE']
    grouping_cols_stats = ['MATERIAL_NAME_TULUS', 'THICKNESS_TULUS [mm]']

    # Feature continue da valutare
    continuous_feats = [
        'CONTOUR_SPEED [mm/min]',
        'LASER_POWER [W]',
        'CONTOUR_GAS_PRESSURE [bar]',
        'CONTOUR_NOZZLE_DISTANCE [mm]',
        'CONTOUR_FOCAL [mm]'
    ]

    # Precalcola mean e std per materiale+spessore su feature continue
    stats = all_good_configs.groupby(grouping_cols_stats)[continuous_feats].agg(['mean', 'std'])
    stats.columns = ['_'.join(col).strip() for col in stats.columns.values]

    mae_list = {feat: [] for feat in continuous_feats}
    no_match_count = 0  # Contatore dei casi senza match di nozzle
    total_predictions = len(predictions)

    for idx, pred_row in predictions.iterrows():
        mat = pred_row['MATERIAL_NAME_TULUS']
        thick = pred_row['THICKNESS_TULUS [mm]']
        nozzle = pred_row['NOZZLE_TYPE']

        # Filtra configurazioni buone con stesso materiale, spessore e nozzle
        subset_good = all_good_configs[
            (all_good_configs['MATERIAL_NAME_TULUS'] == mat) &
            (all_good_configs['THICKNESS_TULUS [mm]'] == thick) &
            (all_good_configs['NOZZLE_TYPE'] == nozzle)
        ]

        if subset_good.empty:
            no_match_count += 1
            continue

        try:
            mean_vals = stats.loc[(mat, thick)][[f'{feat}_mean' for feat in continuous_feats]].values
            std_vals = stats.loc[(mat, thick)][[f'{feat}_std' for feat in continuous_feats]].values
        except KeyError:
            continue

        std_vals = np.where(std_vals == 0, 1, std_vals)

        pred_feats = pred_row[continuous_feats].values.astype(float)
        good_feats = subset_good[continuous_feats].values.astype(float)

        pred_norm = (pred_feats - mean_vals) / std_vals
        good_norm = (good_feats - mean_vals) / std_vals

        mae_distances = np.mean(np.abs(good_norm - pred_norm), axis=1)
        min_idx = np.argmin(mae_distances)
        best_match_orig = good_feats[min_idx]
        mae_feat = np.abs(pred_feats - best_match_orig)

        for i, feat in enumerate(continuous_feats):
            mae_list[feat].append(mae_feat[i])
    print(mae_list)
    mae_means = {feat: np.mean(vals) if vals else np.nan for feat, vals in mae_list.items()}
    
    print(f"\nTotale predizioni: {total_predictions}")
    # Aggiunge il conteggio e la frequenza di match mancati
    print("MAE per feature:")
    for feat, mae in mae_means.items():
        print(f"{feat}: {mae:.4f}")

    print(f"Nessuna good config trovata con stesso nozzle: {no_match_count}")
    print(f"Frequenza mismatch nozzle: {no_match_count / total_predictions:.2%}")

    return mae_means, no_match_count, total_predictions



def evaluate_model(input_dir):
    cat_cols=["NOZZLE_TYPE","MATERIAL_NAME_TULUS"]
    #X_test=pd.read_excel("MLP_Regressor_output/Burr_predictions.xlsx")
    X_test=pd.read_excel(input_dir)
    final_data2=pd.read_excel('outlier_detection_output/final_data_cleaned.xlsx').sample(frac=1, random_state=42)
    print(f"final_data2 length: {len(final_data2)}")
    final_data2=final_data2.drop_duplicates(subset=[col for col in final_data2.columns if col not in ['DEFECT_TYPE']], keep='first', inplace=False).reset_index(drop=True)
    final_data2=flatten_categorical_columns(final_data2,cat_cols,only_quality_cut=False,drop_duplicates=False)
    X_test=flatten_categorical_columns(X_test,cat_cols,only_quality_cut=False,drop_duplicates=False)
    print("X_test length after flattening: ", len(X_test))
    print(f"final_data2 length after cleaning: {len(final_data2)}")
    print(final_data2.columns)
    lack_mate=[col for col in final_data2.columns if col not in X_test.columns]
    X_test[lack_mate]=0
    X_test=X_test[final_data2.columns]
    X_test=X_test.drop(columns=["DEFECT_TYPE"],axis=1)
    X_test['DEFECT_TYPE'] = 'No Defects'
    y_test=X_test["DEFECT_TYPE"].copy()
    y_train=final_data2["DEFECT_TYPE"].copy()
    print(f'Train classes: \n {final_data2["DEFECT_TYPE"].value_counts()}')
    print(f'\n Test classes: \n {X_test["DEFECT_TYPE"].value_counts()}')
    X_train=final_data2.drop(columns=["DEFECT_TYPE"],axis=1)
    X_test=X_test.drop(columns=["DEFECT_TYPE"],axis=1)
    mlp_classifier(X_train, X_test, y_train, y_test)
    catboost_classifier(input_dir=input_dir)
    apply_random_forest(X_train, X_test, y_train, y_test)
    apply_KNN(X_train, X_test, y_train, y_test)
    #apply_SVM(X_train, X_test, y_train, y_test)


input_dir='catboost2_output/Original_predictions.xlsx'

input_dir2='MLP_Regressor_output/full_predictions.xlsx'

evaluate_model(input_dir=input_dir2)
print("\n\n")
print("Starting mean MAE evaluation...")
mean_mae_evaluation(input_dir=input_dir2)
print("Evaluation completed.")