import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from sklearn.model_selection import ParameterGrid, cross_val_score
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import GridSearchCV,StratifiedKFold,RandomizedSearchCV
from sklearn.preprocessing import MinMaxScaler,StandardScaler,LabelEncoder
from sklearn.model_selection import ParameterGrid
from tqdm import tqdm
from utils import flatten_categorical_columns, reverse_get_dummies


#Miglior parametri: {'bootstrap': True, 'class_weight': None, 'criterion': 'entropy', 'max_depth': 10, 'max_features': 0.8, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 50}
#Miglior punteggio F1: 0.8921355156377127
#Miglior punteggio F1 sul test set: 0.9117410185550731

def split_data(df,only_quality_cut=True,validation=False):

    if only_quality_cut:
        y = df['QUALITY_CUT']  # La colonna target
        try:
            df = df.drop(['DEFECT_TYPE'], axis=1)  # Rimuovi la colonna DEFECT_TYPE
        except KeyError:
            print("Colonna 'DEFECT_TYPE' non trovata nel DataFrame.")
    else:
        y = df['DEFECT_TYPE']  # La colonna target
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)
    if validation:
      X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.1,stratify=y)
    else:
      X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2,stratify=y)
    return X_train, X_test, y_train, y_test


def apply_random_forest(X, X_train, X_test, y_train, y_test):

    continous_features=["THICKNESS_TULUS [mm]","CONTOUR_SPEED [mm/min]","LASER_POWER [W]","CONTOUR_GAS_PRESSURE [bar]","CONTOUR_NOZZLE_DISTANCE [mm]","CONTOUR_FOCAL [mm]","NOZZLE_SIZE"]
    classes = ['0', '1']
    metrics = ['precision', 'recall', 'f1-score', 'support']
    report_accumulator = {cls: {m: 0.0 for m in metrics} for cls in classes}
    report_train_accumulator = {cls: {m: 0.0 for m in metrics} for cls in classes}
  
    param_grid = {
      'criterion': ['entropy','log_loss','gini'],
      'class_weight': [None, 'balanced'],
      'max_leaf_nodes': [None,10],
      'min_impurity_decrease': [0.0,0.05],
      'n_estimators': [50,100,150],
      'max_depth': [2,5, 10, 15, 20],
      'min_samples_split': [2, 5, 7],
    'min_samples_leaf': [1, 2, 4],
    'max_features': [None,0.8, 0.9],
    'bootstrap': [True],
    'max_samples': [None,0.8, 0.9]
        }
    """
    rf_model = RandomForestClassifier(n_estimators=10)
    grid_search = GridSearchCV(estimator=rf_model,
                           param_grid=param_grid,
                           cv=5,
                           scoring='f1_weighted',
                           n_jobs=-1,verbose=2)
    grid_search.fit(X_train, y_train)
    print("Miglior parametri:", grid_search.best_params_)
    print("Miglior punteggio F1:", grid_search.best_score_)
    print("Miglior punteggio F1 sul test set:", grid_search.score(X_test, y_test))
    """
    n=100
    for k in range(n):

      X_train, X_test, y_train, y_test=split_data(X,only_quality_cut=True)
      X_train=X_train.copy().drop(["QUALITY_CUT"],axis=1)
      X_test=X_test.copy().drop(["QUALITY_CUT"],axis=1)
      """
      rf_model = RandomForestClassifier(n_estimators= 100,criterion= 'entropy',bootstrap= True, ccp_alpha=0.0005, class_weight= None, max_depth= 20,
                                        max_features= 'log2', max_leaf_nodes= None, min_impurity_decrease= 0.0005,
                                        min_samples_leaf= 2, min_samples_split= 2 )
      """
      rf_model = RandomForestClassifier(
         **{'bootstrap': True, 'class_weight':None, 'criterion': 'entropy', 'max_depth': 10, 'max_features': 0.8, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 50}
      )
      rf_model.fit(X_train,y_train)

      y_pred_train = rf_model.predict(X_train)
      y_pred_dt = rf_model.predict(X_test)
      report = classification_report(y_test, y_pred_dt, output_dict=True, zero_division=0.0)
      report_train = classification_report(y_train, y_pred_train, output_dict=True, zero_division=0.0)
      for cls in classes:
          for m in metrics:
              report_accumulator[cls][m] += report.get(cls, {}).get(m, 0.0)

      for cls in classes:
          for m in metrics:
              report_train_accumulator[cls][m] += report_train.get(cls, {}).get(m, 0.0)

    report_avg = {
        cls: {m: report_accumulator[cls][m] / n for m in metrics}
        for cls in classes
    }
    report_train_avg = {
        cls: {m: report_train_accumulator[cls][m] / n for m in metrics}
        for cls in classes
    }


    print(f"\nNumber of training samples:{len(X_train)}")
    print(f"Number of test samples:{len(X_test)}")
    print("Random Forest Test Mean Class Accuracy: \n ")
    for cls in classes:
      acc=f'class: {cls} , '
      for m in metrics:
          val = report_avg[cls][m]

          if m != 'support':
              acc=acc+f'{m}: {val:.2f} , '
          else:
              acc=acc+f'{m}: {int(val)}'
      print(acc)
    print("\nRandom Forest Train Mean Class Accuracy: \n ")
    for cls in classes:
      acc=f'class: {cls} , '
      for m in metrics:
          val = report_train_avg[cls][m]

          if m != 'support':
              acc=acc+f'{m}: {val:.2f} , '
          else:
              acc=acc+f'{m}: {int(val)}'
      print(acc)


final_data=pd.read_excel("outlier_detection_output/final_data_cleaned.xlsx").sample(frac=1).reset_index(drop=True)
final_data['QUALITY_CUT']=final_data['DEFECT_TYPE'].apply(lambda x: 'Good' if x == 'No Defects' else 'Bad')
final_data['NOZZLE_TYPE']=LabelEncoder().fit_transform(final_data['NOZZLE_TYPE'])
final_data['MATERIAL_NAME_TULUS']=LabelEncoder().fit_transform(final_data['MATERIAL_NAME_TULUS'])
X_train, X_test, y_train, y_test = split_data(final_data, only_quality_cut=True, validation=True)
X_train = X_train.copy().drop(["QUALITY_CUT"], axis=1)
X_test = X_test.copy().drop(["QUALITY_CUT"], axis=1)
apply_random_forest(final_data, X_train, X_test, y_train, y_test)