
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import random
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from utils import flatten_categorical_columns,reverse_get_dummies
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans,DBSCAN
from sklearn.metrics import mean_absolute_error, mean_squared_error, silhouette_score,r2_score
from kmodes.kprototypes import KPrototypes
from bad_good_mapper import pair_configs
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn.neural_network import MLPRegressor
from MLP_RegressorWrapper import MultiOutputMLPWrapper,mean_mae
from sklearn.model_selection import ParameterGrid
import json
random.seed(42)

# Best MLP model params for class Burr: {'activation': 'relu', 'alpha': 0.001, 'batch_size': 32, 'early_stopping': True, 'hidden_layer_sizes': (32, 32), 'learning_rate': 'constant', 'learning_rate_init': 0.01, 'max_iter': 500, 'n_iter_no_change': 20, 'solver': 'adam'},
#  score: 60.7345
#--------------------------------------------
#Best MLP model params for class Burr: {'activation': 'relu', 'alpha': 0.01, 'early_stopping': True, 'hidden_layer_sizes': (64,), 'learning_rate': 'constant', 'learning_rate_init': 0.0005, 'max_iter': 500, 'n_iter_no_change': 10, 'solver': 'lbfgs'}, 
# score: 61.8759
#--------------------------------------------
#Best MLP model params for class Burr: {'activation': 'relu', 'alpha': 0.001, 'batch_size': 16, 'early_stopping': True, 'hidden_layer_sizes': (64,), 'learning_rate': 'constant', 'learning_rate_init': 0.01, 'max_iter': 500, 'momentum': 0.6, 'n_iter_no_change': 20, 'nesterovs_momentum': True, 'solver': 'sgd'}, 
# score: 59.9489
#-------------------------------------------------------------------------------------------------------------------------------
#Best MLP model params for class Plasma: {'activation': 'relu', 'alpha': 0.01, 'batch_size': 16, 'early_stopping': True, 'hidden_layer_sizes': (32, 32), 'learning_rate': 'constant', 'learning_rate_init': 0.01, 'max_iter': 500, 'n_iter_no_change': 20, 'solver': 'adam'},
# score: 83.6174
#-------------------------------------------------------------------------------------------------------------------------------
#Best MLP model params for class Cutting loss: {'activation': 'relu', 'alpha': 0.001, 'batch_size': 16, 'early_stopping': True, 'hidden_layer_sizes': (32, 32), 'learning_rate': 'constant', 'learning_rate_init': 0.01, 'max_iter': 500, 'n_iter_no_change': 20, 'solver': 'adam'},
# score: 48.1601
#-------------------------------------------------------------------------------------------------------------------------------
#Best MLP model params for class Cutting torn: {'activation': 'relu', 'alpha': 0.01, 'batch_size': 16, 'early_stopping': True, 'hidden_layer_sizes': (32, 32), 'learning_rate': 'constant', 'learning_rate_init': 0.001, 'max_iter': 500, 'n_iter_no_change': 20, 'solver': 'adam'},
# score: 52.2494
best_params_mlp={
    'Burr': {'activation': 'relu', 'alpha': 0.001, 'batch_size': 32, 'early_stopping': True, 'hidden_layer_sizes': (32, 32), 'learning_rate': 'constant', 'learning_rate_init': 0.01, 'max_iter': 500, 'n_iter_no_change': 20, 'solver': 'adam'},
    'Plasma': {'activation': 'relu', 'alpha': 0.01, 'batch_size': 16, 'early_stopping': True, 'hidden_layer_sizes': (32,32), 'learning_rate': 'constant', 'learning_rate_init': 0.01, 'max_iter': 500, 'n_iter_no_change': 20, 'solver': 'lbfgs'},
    'Cutting loss': {'activation': 'relu', 'alpha': 0.001, 'batch_size': 16, 'early_stopping': True, 'hidden_layer_sizes': (32, 32), 'learning_rate': 'constant', 'learning_rate_init': 0.01, 'max_iter': 500, 'n_iter_no_change': 20, 'solver': 'adam'},
    'Cutting torn': {'activation': 'relu', 'alpha': 0.01, 'batch_size': 16, 'early_stopping': True, 'hidden_layer_sizes': (32, 32), 'learning_rate': 'constant', 'learning_rate_init': 0.001, 'max_iter': 500, 'n_iter_no_change': 20, 'solver': 'adam'},
    'full': {'activation': 'relu', 'alpha': 0.001, 'batch_size': 32, 'early_stopping': True, 'hidden_layer_sizes': (32, 32), 'learning_rate': 'constant', 'learning_rate_init': 0.01, 'max_iter': 500, 'n_iter_no_change': 20, 'solver': 'adam'}
}
rounding_factors = {
    "THICKNESS_TULUS [mm]": 1,        # Arrotonda a 1 decimale
    "CONTOUR_SPEED [mm/min]": 50,     # Arrotonda a multipli di 50
    "LASER_POWER [W]": 1000,          # Arrotonda a multipli di 1000
    "CONTOUR_GAS_PRESSURE [bar]": 0.5, # Arrotonda a multipli di 0.5 (= 1/2)
    "CONTOUR_NOZZLE_DISTANCE [mm]": 0.1, # Arrotonda a multipli di 0.1
    "CONTOUR_FOCAL [mm]": 0.5 ,                                                                      # Arrotonda a multipli di 0.5
}


if __name__ == "__main__":
    cont_features = ['THICKNESS_TULUS [mm]', 'CONTOUR_SPEED [mm/min]', 'LASER_POWER [W]',
                            'CONTOUR_GAS_PRESSURE [bar]', 'CONTOUR_NOZZLE_DISTANCE [mm]', 'CONTOUR_FOCAL [mm]']
    cat_features=['NOZZLE_TYPE','MATERIAL_NAME_TULUS','DEFECT_TYPE']
    final_data2=pd.read_excel('outlier_detection_output/final_data_cleaned.xlsx').sample(frac=1, random_state=42).reset_index(drop=True)
    print(final_data2.dtypes)
    print(f"final_data2 length: {len(final_data2)}")
    final_data2=final_data2.drop_duplicates(subset=[col for col in final_data2.columns if col not in ['DEFECT_TYPE', 'QUALITY_CUT']], keep='first', inplace=False).reset_index(drop=True)
    print(f"final_data2 length after cleaning: {len(final_data2)}")
    final_data2=final_data2[cont_features+cat_features]

    defect='Burr'

    scaler=StandardScaler()
    scaler.fit(final_data2[final_data2['DEFECT_TYPE'] == 'No Defects'][cont_features])
    print(final_data2.columns)
    materials=[material for material in final_data2['MATERIAL_NAME_TULUS'].unique()]
    thicknesses=list(final_data2['THICKNESS_TULUS [mm]'].unique())

    columns_order=final_data2.columns
    mapping_bad_to_good_train = []  
    mapping_bad_to_good_test = []  # Lista per memorizzare le coppie di configurazioni buone e cattive

    for material in materials:
        data_mat = final_data2[(final_data2['MATERIAL_NAME_TULUS'] == material)]
        for thickness in thicknesses:
            # Separazione good e bad per questo materiale e spessore
            data_good = data_mat[(data_mat['THICKNESS_TULUS [mm]'] == thickness) & (data_mat['DEFECT_TYPE'] == 'No Defects')]
            data_bad = data_mat[(data_mat['THICKNESS_TULUS [mm]'] == thickness) & (data_mat['DEFECT_TYPE'] != 'No Defects')]
            
            if len(data_good) != 0 and len(data_bad) != 0:
                # Assicura riproducibilità
                data_good = data_good.sample(frac=1, random_state=42)
                data_bad = data_bad.sample(frac=1, random_state=42)

                # Calcola quanti elementi per test (almeno 1 se possibile)
                test_size_bad = max(1, int(len(data_bad) * 0.1))
                
                if len(data_bad) <= 3:
                    test_size_bad = 1

                if len(data_bad) == 1:
                    test_size_bad = 0

                # Split train/test per good
                good_train = data_good.copy()
                good_test = data_good.copy()

                # Split train/test per bad
                bad_train = data_bad.iloc[test_size_bad:]
                bad_test = data_bad.iloc[:test_size_bad]

                # Aggiungi al set di training se ci sono dati
                if len(good_train) > 0 and len(bad_train) > 0:
                    mapping_bad_to_good_train.append({
                        "material": material,
                        "thickness": thickness,
                        "bad_configs": bad_train.copy(),
                        "good_configs": good_train.copy()
                    })

                # Aggiungi al set di test se ci sono dati
                if len(good_test) > 0 and len(bad_test) > 0:
                    mapping_bad_to_good_test.append({
                        "material": material,
                        "thickness": thickness,
                        "bad_configs": bad_test.copy(),
                        "good_configs": good_test.copy()
                    })

    # Stampa statistiche
    print(f"Training set: {len(mapping_bad_to_good_train)} combinazioni materiale/spessore")
    print(f"Test set: {len(mapping_bad_to_good_test)} combinazioni materiale/spessore")

    # Opzionale: stampa la distribuzione delle configurazioni
    total_train_bad = sum(len(entry["bad_configs"]) for entry in mapping_bad_to_good_train)
    total_train_good = sum(len(entry["good_configs"]) for entry in mapping_bad_to_good_train)
    total_test_bad = sum(len(entry["bad_configs"]) for entry in mapping_bad_to_good_test)
    total_test_good = sum(len(entry["good_configs"]) for entry in mapping_bad_to_good_test)

    print(f"Training: {total_train_bad} bad configs, {total_train_good} good configs")
    print(f"Test: {total_test_bad} bad configs, {total_test_good} good configs")


    training_pairs = []  # Ogni elemento sarà una tupla (bad_config, good_config)
    test_pairs=[]
   
    for entry in mapping_bad_to_good_train:
        bad_configs = entry["bad_configs"]
        good_configs = entry["good_configs"]
        print(f'MATERIAL: {entry["material"]}, THICKNESS: {entry["thickness"]}')
        # usiamo il clustering per trovare la configurazione 'good' più adatta in termini di distanza di Gower e a garantire che a configurazioni bad vicine, corrispondano configurazioni good vicine
        pairs=pair_configs(bad_configs,good_configs,[f for f in cont_features if f not in ['THICKNESS_TULUS [mm]']],[f for f in cat_features if f not in ['DEFECT_TYPE','MATERIAL_NAME_TULUS']])
        training_pairs.extend(pairs)
        print(f"Training pairs: {len(pairs)}")
        
    train_fake_bad_len=len(training_pairs)*0.1
    fake_bad=map(lambda x: (x[1],x[1]),list(random.sample(training_pairs, int(train_fake_bad_len))))
    training_pairs.extend(fake_bad)    

    for entry in mapping_bad_to_good_test:
       bad_configs = entry["bad_configs"]
       good_configs = entry["good_configs"]
       print(f'MATERIAL: {entry["material"]}, THICKNESS: {entry["thickness"]}')
       # usiamo il clustering per trovare la configurazione 'good' più adatta in termini di cluster e a garantire che a configurazioni bad vicine, corrispondano configurazioni good vicine
       pairs=pair_configs(bad_configs,good_configs,[f for f in cont_features if f not in ['THICKNESS_TULUS [mm]']],[f for f in cat_features if f not in ['DEFECT_TYPE','MATERIAL_NAME_TULUS']])
       test_pairs.extend(pairs)
       print(f"test_pairs: {len(pairs)}")
    
    test_fake_bad_len=len(test_pairs)*0.1
    fake_bad=map(lambda x: (x[1],x[1]),list(random.sample(test_pairs, int(test_fake_bad_len))))
    test_pairs.extend(fake_bad)
    
    random.shuffle(training_pairs)
    random.shuffle(test_pairs)

    print(f"training_pairs: {len(training_pairs)}")
    X_train = pd.concat([pair[0] for pair in training_pairs], ignore_index=True)
    y_train = pd.concat([pair[1] for pair in training_pairs], ignore_index=True)
    X_train=pd.DataFrame(X_train, columns=columns_order)
    X_train=X_train.copy().astype({'THICKNESS_TULUS [mm]': 'float64',
                                    'CONTOUR_SPEED [mm/min]': 'float64',
                                    'LASER_POWER [W]': 'float64',
                                    'CONTOUR_GAS_PRESSURE [bar]': 'float64',
                                    'CONTOUR_NOZZLE_DISTANCE [mm]': 'float64',
                                    'CONTOUR_FOCAL [mm]': 'float64'})
    
    y_train= y_train.copy().astype({'THICKNESS_TULUS [mm]': 'float64',
                                    'CONTOUR_SPEED [mm/min]': 'float64',
                                    'LASER_POWER [W]': 'float64',
                                    'CONTOUR_GAS_PRESSURE [bar]': 'float64',
                                    'CONTOUR_NOZZLE_DISTANCE [mm]': 'float64',
                                    'CONTOUR_FOCAL [mm]': 'float64'})
    
    y_train=pd.DataFrame(y_train, columns=columns_order).drop(columns=['DEFECT_TYPE'])
    X_test = pd.concat([pair[0] for pair in test_pairs], ignore_index=True)
    y_test = pd.concat([pair[1] for pair in test_pairs], ignore_index=True)
    X_test=pd.DataFrame(X_test, columns=columns_order)
    X_test=X_test.copy().astype({'THICKNESS_TULUS [mm]': 'float64',
                                    'CONTOUR_SPEED [mm/min]': 'float64',
                                    'LASER_POWER [W]': 'float64',
                                    'CONTOUR_GAS_PRESSURE [bar]': 'float64',
                                    'CONTOUR_NOZZLE_DISTANCE [mm]': 'float64',
                                    'CONTOUR_FOCAL [mm]': 'float64'})

    y_test= y_test.copy().astype({'THICKNESS_TULUS [mm]': 'float64',
                                    'CONTOUR_SPEED [mm/min]': 'float64',
                                    'LASER_POWER [W]': 'float64',
                                    'CONTOUR_GAS_PRESSURE [bar]': 'float64',
                                    'CONTOUR_NOZZLE_DISTANCE [mm]': 'float64',
                                    'CONTOUR_FOCAL [mm]': 'float64'})
    y_test=pd.DataFrame(y_test, columns=columns_order).drop(columns=['DEFECT_TYPE'])
    y_test_materials=y_test['MATERIAL_NAME_TULUS'].copy()
    y_test_thicknesses=y_test['THICKNESS_TULUS [mm]'].copy()

    
    y_test = y_test.drop(columns=['MATERIAL_NAME_TULUS'])
    y_train = y_train.drop(columns=['MATERIAL_NAME_TULUS'])
    X_train = flatten_categorical_columns(X_train, cat_features)
    X_test = flatten_categorical_columns(X_test, cat_features)
    y_test = flatten_categorical_columns(y_test, ['NOZZLE_TYPE'])
    y_train = flatten_categorical_columns(y_train, ['NOZZLE_TYPE'])

    lacking_materials=[col for col in X_train.columns if col.startswith('MATERIAL_NAME_TULUS_') and col not in X_test.columns]
    lacking_nozzles=[col for col in X_train.columns if col.startswith('NOZZLE_TYPE_') and col not in X_test.columns]
    X_test[lacking_materials] = 0
    X_test[lacking_nozzles] = 0
    X_test=X_test[X_train.columns]
    print(f'X_train columns:\n {X_train.columns}')
    print(f'X_test columns:\n {X_test.columns}')
    print(f'y_train columns:\n {y_train.columns}')
    print(f'y_test columns:\n {y_test.columns}')

"""

    param_grid = {
    'hidden_layer_sizes': [
        (32,),(64,),
         (32, 32), 
    ],
    'activation': ['relu'],
    'solver': ['adam','lbfgs'],
    'alpha': [1e-4, 1e-3,1e-2],
    'learning_rate_init': [0.0005, 0.001, 0.01],
    'learning_rate': ['constant', 'adaptive'],
    'batch_size': [16, 32],
    'early_stopping': [False], # lo disattivo poichè MLP_Regressor non accetta direttamente un validation_set in input
    'n_iter_no_change': [10, 20],
    'max_iter': [200,500, 1000]
}



    # Setup 5-fold cross validation for MLP
best_score_mlp = -np.inf
best_params_mlp = None
best_model_mlp = None

scaler1=StandardScaler()
for params in ParameterGrid(param_grid):
    print(f"Current MLP params: {params}")
    fold_scores = []
    assert len(X_train) == len(y_train), f"X_train has {len(X_train)} rows, y_train has {len(y_train)} rows!"
    kf = KFold(n_splits=4, shuffle=True, random_state=42)
    # Perform 4-fold cross-validation
    for fold_idx, (train_idx, val_idx) in enumerate(kf.split(X_train)):
        X_train_fold = X_train.iloc[train_idx].copy()
        X_train_fold[cont_features]=scaler1.fit_transform(X_train_fold[cont_features].copy())
        y_train_fold = y_train.iloc[train_idx].copy()
        y_train_fold[cont_features]=scaler1.transform(y_train_fold[cont_features].copy())
        X_val_fold = X_train.iloc[val_idx].copy()
        X_val_fold[cont_features]=scaler1.transform(X_val_fold[cont_features].copy())
        y_val_fold = y_train.iloc[val_idx].copy()
        y_val_fold[cont_features]=scaler1.transform(y_val_fold[cont_features].copy())
        # Train model on this fold
        fold_model = MultiOutputMLPWrapper(**params, random_state=42, validation_fraction=0.0)
        fold_model.fit(X_train_fold, y_train_fold.drop(columns=['THICKNESS_TULUS [mm]']))
        
        # Score on validation set
        fold_score = fold_model.score(X_val_fold, y_val_fold, y_train_fold,scaler1,cont_features)
        fold_scores.append(fold_score)
    
    # Average score across all folds
    avg_score = np.mean(fold_scores)
    print(f"  Average CV MAE score: {-avg_score:.4f}")
    
    # Update best model if we found a better one
    if avg_score > best_score_mlp:
        best_score_mlp = avg_score
        best_params_mlp = params
        print(f"  New best MLP model found! \n params: {best_params_mlp}, score: {-best_score_mlp:.4f}")

print(f"Best MLP model params for class {defect}: {best_params_mlp}, score: {-best_score_mlp:.4f}")
"""
best_mlp_params = {}
scaler1=StandardScaler()
# Train the best model on the full training set
best_mlp_model = MultiOutputMLPWrapper( **best_params_mlp['full'],random_state=42,validation_fraction=0.2)
X_train[cont_features] = scaler1.fit_transform(X_train[cont_features].copy())
y_train[cont_features] = scaler1.transform(y_train[cont_features].copy())
best_mlp_model.fit(X_train, y_train.drop(columns=['THICKNESS_TULUS [mm]']))

print("MAE on training set:", mean_mae(y_train, best_mlp_model.predict(X_train), y_train, scaler1, cont_features))
print("MAE on test set:", mean_mae(y_test, best_mlp_model.predict(X_test), y_train, scaler1, cont_features))
# Evaluate on the test set
X_test[cont_features] = scaler1.transform(X_test[cont_features].copy())
y_test[cont_features] = scaler1.transform(y_test[cont_features].copy())
y_pred= best_mlp_model.predict(X_test)
print("MAE on training set:", mean_mae(y_test, best_mlp_model.predict(X_test), y_train, scaler1, cont_features))
y_pred=pd.DataFrame(y_pred, columns=[col for col in y_test.columns if col != 'THICKNESS_TULUS [mm]' and not col.startswith('MATERIAL_NAME_TULUS_')]).reset_index(drop=True)
y_test['MATERIAL_NAME_TULUS'] = y_test_materials.copy()
y_pred['THICKNESS_TULUS [mm]'] = y_test['THICKNESS_TULUS [mm]'].copy()
y_pred['MATERIAL_NAME_TULUS'] = y_test_materials.copy()
columns_order = y_test.columns
y_pred=y_pred[columns_order]
y_pred[cont_features] = scaler1.inverse_transform(y_pred[cont_features].copy())
y_test[cont_features] = scaler1.inverse_transform(y_test[cont_features].copy())

# Round the continuous parameters according to rounding factors
for col, factor in rounding_factors.items():
    if col in y_pred.columns:
        y_pred[col] = (y_pred[col] / factor).round() * factor
    if col in y_test.columns:
        y_test[col] = (y_test[col] / factor).round() * factor

# Fix one-hot encoding for NOZZLE_TYPE
nozzle_columns = [col for col in y_pred.columns if col.startswith('NOZZLE_TYPE_')]
if nozzle_columns:
    # Process each row
    for i in range(len(y_pred)):
        # Find column with highest value
        max_col = None
        max_val = -float('inf')
        for col in nozzle_columns:
            if y_pred.loc[i, col] > max_val:
                max_val = y_pred.loc[i, col]
                max_col = col
        
        # Set highest to 1, all others to 0
        for col in nozzle_columns:
            y_pred.loc[i, col] = 1.0 if col == max_col else 0.0
"""
for i in range(len(y_pred)):
    print(f"Predicted: {y_pred.iloc[i].to_dict()}")
    print(f"Actual: {y_test.iloc[i].to_dict()}")
    print("\n")
"""

X_test=reverse_get_dummies(X_test, ['NOZZLE_TYPE','DEFECT_TYPE'], keep_old_columns=False)
y_pred=reverse_get_dummies(y_pred, ['NOZZLE_TYPE'], keep_old_columns=False)
y_pred['DEFECT_TYPE']=X_test['DEFECT_TYPE'].copy()
y_pred.to_excel(f"MLP_Regressor_output/full_predictions.xlsx", index=False)
