import random
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.mixture import GaussianMixture
import numpy as np
from sklearn.model_selection import ParameterGrid, cross_val_score
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import GridSearchCV,HalvingGridSearchCV,StratifiedKFold,RandomizedSearchCV
from sklearn.preprocessing import MinMaxScaler,StandardScaler,LabelEncoder
from sklearn.model_selection import ParameterGrid
from tqdm import tqdm
from utils import flatten_categorical_columns, reverse_get_dummies
from catboost import CatBoostClassifier



def split_data(df,only_quality_cut=True,validation=False):

    if only_quality_cut:
        y = df['QUALITY_CUT']  # La colonna target
    else:
        y = df['DEFECT_TYPE']  # La colonna target
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)
    if validation:
      X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.1,stratify=y)
    else:
      X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2,stratify=y)
    return X_train, X_test, y_train, y_test


def apply_SVM(X_train, X_test, y_train, y_test):
    best_params={}
    ker=["linear","poly"]
    param_grid={
       "linear":{'C': [0.01, 0.1,0.5 ,1,2,5,10,20,30,40,50,60,70,80,90, 100]},
       "poly":{ 'C': [0.01,0.05,0.1,0.5, 1, 10],
    'degree': [2, 3, 4,5,6],
    'gamma': ['scale', 'auto', 0.01, 0.001,0.1],
    'coef0': [0, 1, 10],
    'class_weight': [None, 'balanced']}
    }
    continous_features=["THICKNESS_TULUS [mm]","CONTOUR_SPEED [mm/min]","LASER_POWER [W]","CONTOUR_GAS_PRESSURE [bar]","CONTOUR_NOZZLE_DISTANCE [mm]","CONTOUR_FOCAL [mm]"]
    print(f"\nNumber of training samples:{len(X_train)}")
    print(f"Number of test samples:{len(X_test)}")
    print("SVM Mean Class Accuracy: \n")
    for k in ker:
        classes = ['0', '1', '2', '3', '4']
        metrics = ['precision', 'recall', 'f1-score', 'support']
        report_accumulator = {cls: {m: 0.0 for m in metrics} for cls in classes}
        report_train_accumulator = {cls: {m: 0.0 for m in metrics} for cls in classes}
   
        scaler = StandardScaler()  # Oppure MinMaxScaler()
        scaled_continuous_train = pd.DataFrame(scaler.fit_transform(X_train[continous_features]), columns=continous_features,index=X_train.index)

        non_continuous_train = X_train[[col for col in X_train.columns if col not in continous_features]]
        X_train_scaled = pd.concat([scaled_continuous_train, non_continuous_train], axis=1)
        scaled_continuous_test = pd.DataFrame(scaler.transform(X_test[continous_features]), columns=continous_features,index=X_test.index)
        non_continuous_test = X_test[[col for col in X_test.columns if col not in continous_features]]
        X_test_scaled = pd.concat([scaled_continuous_test, non_continuous_test], axis=1)
        svm_model = SVC(kernel=k,random_state=42)
        
        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        grid_search = GridSearchCV(
        estimator=svm_model,
        param_grid=param_grid[k],
        cv=skf,  # Usa StratifiedKFold qui
        scoring='f1_weighted',  # o 'accuracy', 'roc_auc' se binario
        n_jobs=-1,
        verbose=2,
    
    )
        grid_search.fit(X_train_scaled, y_train)

    # Stampa i migliori parametri trovati
        print(f"Migliori parametri trovati per {k}:")
        print(grid_search.best_params_)
        best_params[k]= grid_search.best_params_
    print(f"Best params: {best_params}")


final_data2=pd.read_excel('outlier_detection_output/final_data_cleaned.xlsx')
#final_data2=pd.read_excel("merged_files.xlsx").drop(columns=["TECHNOLOGY_GAS",'CONTOUR_LASER_MODE','LASER_TYPE','QUALITY_CUT'])

print(f"final_data2 length: {len(final_data2)}")
final_data2=final_data2.drop_duplicates(subset=[col for col in final_data2.columns if col not in ['DEFECT_TYPE', 'QUALITY_CUT']], keep='first', inplace=False).reset_index(drop=True)
print(f"final_data2 length after cleaning: {len(final_data2)}")
print(final_data2.columns)
encoded_data=flatten_categorical_columns(final_data2,["NOZZLE_TYPE","MATERIAL_NAME_TULUS"],only_quality_cut=False)
print(encoded_data.columns)

X_train, X_test, y_train, y_test=split_data(encoded_data,only_quality_cut=False)
print(f'Train classes: \n {X_train["DEFECT_TYPE"].value_counts()}')
print(f'\n Test classes: \n {X_test["DEFECT_TYPE"].value_counts()}')
X_train=X_train.drop(columns=["DEFECT_TYPE"],axis=1)
X_test=X_test.drop(columns=["DEFECT_TYPE"],axis=1)
apply_SVM(X_train, X_test, y_train, y_test)