import optuna
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
import scipy.io
import tkinter as tk
from tkinter import filedialog, messagebox, simpledialog
import joblib
from sklearn.preprocessing import StandardScaler

# Global variables for data
X, y, X_train, X_test, y_train, y_test = None, None, None, None, None, None
scaler_X = StandardScaler()  # Standardize the features


# Function to load data from file
# Updated Function to load and split data
def load_file_data():
    global X, y, X_train, X_test, y_train, y_test, X_val, y_val, scaler_X, scaler_y
    file_path = filedialog.askopenfilename(filetypes=[("MAT files", "*.mat")])
    if not file_path:
        messagebox.showerror("Error", "Please select a .mat file")
        return

    try:
        mat = scipy.io.loadmat(file_path)
        # Access the data
        vector_3DB = mat['vector_3dB_cleaned']
        vector_5DB = mat['vector_5dB_cleaned']
        vector_10DB = mat['vector_10dB_cleaned']
        vector_equiv = mat['vector_equiv_cleaned']
        Lmax_vector = mat['Lmax_all_vector_cleaned']

        # Stack the input vectors horizontally to form a feature matrix
        X = np.column_stack((vector_3DB, vector_5DB, vector_10DB, vector_equiv))
        y = Lmax_vector

        # Preprocessing - Normalize both input features and  not output data
        X_scaled = scaler_X.fit_transform(X)
        y_scaled = Lmax_vector  # Keep y in its original form

        # Split the dataset into D1 (50%) and D2 (50%)
        X_D1, X_D2, y_D1, y_D2 = train_test_split(X_scaled, y_scaled, test_size=0.5, random_state=42)

        # Further split D1 into training (80%) and testing (20%)
        X_train, X_test, y_train, y_test = train_test_split(X_D1, y_D1, test_size=0.2, random_state=42)

        # D2 will be used for validation later
        X_val, y_val = X_D2, y_D2

        messagebox.showinfo("Success", "Data loaded and split successfully!")
    except Exception as e:
        messagebox.showerror("Error", f"Failed to load the .mat file. {str(e)}")


'''# Function to manually enter vectors
# Updated Function to manually enter and split data
def enter_vectors_manually():
    global X, y, X_train, X_test, y_train, y_test, X_val, y_val, scaler_X, scaler_y

    try:
        vector_3DB = simpledialog.askstring("Input", "Enter the 3dB vector as comma-separated values:")
        vector_5DB = simpledialog.askstring("Input", "Enter the 5dB vector as comma-separated values:")
        vector_10DB = simpledialog.askstring("Input", "Enter the 10dB vector as comma-separated values:")
        vector_equiv = simpledialog.askstring("Input", "Enter the Equivalent Bandwidth vector as comma-separated values:")
        Lmax_vector = simpledialog.askstring("Input", "Enter the Lmax vector as comma-separated values:")

        # Convert the input strings into numpy arrays
        vector_3DB = np.fromstring(vector_3DB, sep=',')
        vector_5DB = np.fromstring(vector_5DB, sep=',')
        vector_10DB = np.fromstring(vector_10DB, sep=',')
        vector_equiv = np.fromstring(vector_equiv, sep=',')
        Lmax_vector = np.fromstring(Lmax_vector, sep=',')

        # Stack the input vectors horizontally to form a feature matrix
        X = np.column_stack((vector_3DB, vector_5DB, vector_10DB, vector_equiv))
        y = Lmax_vector

        # Preprocessing - Normalize both input and output data
        X_scaled = scaler_X.fit_transform(X)
        y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

        # Split the dataset into D1 (50%) and D2 (50%)
        X_D1, X_D2, y_D1, y_D2 = train_test_split(X_scaled, y_scaled, test_size=0.5, random_state=42)

        # Further split D1 into training (80%) and testing (20%)
        X_train, X_test, y_train, y_test = train_test_split(X_D1, y_D1, test_size=0.2, random_state=42)

        # D2 will be used for validation later
        X_val, y_val = X_D2, y_D2

        messagebox.showinfo("Success", "Data entered and split successfully!")
    except Exception as e:
        messagebox.showerror("Error", f"Failed to enter data manually. {str(e)}")
'''


# Function to run Optuna optimization
# Function to run Optuna optimization (no change needed here)
def run_optuna():
    try:
        if X_train is None or y_train is None:
            messagebox.showerror("Error", "Please load or enter the data first!")
            return

        # Define the objective function for Optuna optimization
        def objective(trial):
            model = Sequential()

            # Hyperparameters to be tuned
            n_units_1 = trial.suggest_int('n_units_1', 128, 512, step=64)  # Layer 1 neurons
            n_units_2 = trial.suggest_int('n_units_2', 64, 256, step=64)  # Layer 2 neurons
            n_units_3 = trial.suggest_int('n_units_3', 32, 128, step=32)  # Layer 3 neurons
            dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.3, step=0.05)
            learning_rate = trial.suggest_float('learning_rate', 1e-5, 5e-4, log=True)
            l2_lambda = trial.suggest_float('l2_lambda', 1e-6, 1e-4, log=True)  # L2 regularization

            # Build the neural network model with regularization and batch normalization
            model.add(
                Dense(n_units_1, activation='relu', input_shape=(X_train.shape[1],), kernel_regularizer=l2(l2_lambda)))
            model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))

            model.add(Dense(n_units_2, activation='relu', kernel_regularizer=l2(l2_lambda)))
            model.add(BatchNormalization())
            model.add(Dropout(dropout_rate))

            model.add(Dense(n_units_3, activation='relu', kernel_regularizer=l2(l2_lambda)))
            model.add(BatchNormalization())
            model.add(Dense(1, activation='linear'))  # Output layer for regression

            # Compile the model with the Adam optimizer and tuned hyperparameters
            model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                          loss='mean_squared_error',
                          metrics=['mean_absolute_percentage_error'])

            # Early stopping and learning rate scheduling to improve generalization
            early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
            lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-6, verbose=1)

            # Cross-validation setup (KFold)
            kf = KFold(n_splits=5, shuffle=True, random_state=42)
            losses = []

            for train_idx, val_idx in kf.split(X_train):
                X_train_fold, X_val_fold = X_train[train_idx], X_train[val_idx]
                y_train_fold, y_val_fold = y_train[train_idx], y_train[val_idx]

                model.fit(X_train_fold, y_train_fold, epochs=200, batch_size=32,
                          validation_data=(X_val_fold, y_val_fold),
                          callbacks=[early_stopping, lr_scheduler], verbose=0)

                loss, _ = model.evaluate(X_val_fold, y_val_fold, verbose=0)
                losses.append(loss)

            return np.mean(losses)

        # Set up the Optuna study and optimize the objective function
        study = optuna.create_study(direction='minimize')
        study.optimize(objective, n_trials=1)  # Adjust the number of trials

        # Get the best trial
        best_trial = study.best_trial
        messagebox.showinfo("Optimization Completed", f'Best trial: {best_trial.params}')

        # Train the final model based on the best trial
        build_and_train_best_model(best_trial.params)

    except Exception as e:
        messagebox.showerror("Error", f"Failed to run optimization. {str(e)}")


# Function to build and train the best model
def build_and_train_best_model(best_params):
    global model
    model = Sequential()
    model.add(Dense(best_params['n_units_1'], activation='relu', input_shape=(X_train.shape[1],),
                    kernel_regularizer=l2(best_params['l2_lambda'])))
    model.add(BatchNormalization())
    model.add(Dropout(best_params['dropout_rate']))

    model.add(Dense(best_params['n_units_2'], activation='relu', kernel_regularizer=l2(best_params['l2_lambda'])))
    model.add(BatchNormalization())
    model.add(Dropout(best_params['dropout_rate']))

    model.add(Dense(best_params['n_units_3'], activation='relu', kernel_regularizer=l2(best_params['l2_lambda'])))
    model.add(BatchNormalization())
    model.add(Dense(1, activation='linear'))  # Output layer for regression

    # Compile the best model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=best_params['learning_rate']),
                  loss='mean_squared_error',
                  metrics=['mean_absolute_percentage_error'])

    # Early stopping and learning rate scheduling
    early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-6, verbose=1)

    # Train the best model on D1
    history = model.fit(X_train, y_train, epochs=200, batch_size=32, validation_split=0.2,
                        callbacks=[early_stopping, lr_scheduler])

    # Evaluate the optimized model on the test set (20% of D1)
    loss, mape = model.evaluate(X_test, y_test)
    messagebox.showinfo("Results",
                        f"Optimized Test Loss: {loss}\nOptimized Test MAPE: {mape / 100 :.2f}%\nTest Accuracy: {100 - mape / 100 :.2f}%")

    # Validate the model on D2 (remaining 50% of the dataset)
    val_loss, val_mape = model.evaluate(X_val, y_val)
    messagebox.showinfo("Validation Results",
                        f"Validation Loss (D2): {val_loss}\nValidation MAPE: {val_mape / 100 :.2f}%\nValidation Accuracy: {100 - val_mape / 100 :.2f}%")

    # Save the trained model
    model.save("my_trained_model_30.keras")  # This saves the model to a file
    joblib.dump(scaler_X, "scaler_X.pkl")

    # Plot results
    plot_results(history)


# Function to plot results
# Remove the scaler_y reference and use the actual y values directly

# Inside plot_results function:
def plot_results(history):
    # Plot 1: Training and Validation Loss over Epochs
    plt.figure(figsize=(10, 6))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Training and Validation Loss over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss (Mean Squared Error)')
    plt.legend()
    plt.grid(True)
    plt.show()

    # Predict and filter outliers
    y_pred = model.predict(X_test)

    # Since we aren't scaling y, directly use y_test and y_pred for plotting
    y_pred_rescaled = y_pred
    y_test_rescaled = y_test

    percentage_differences = np.abs((y_test_rescaled - y_pred_rescaled) / y_test_rescaled) * 100
    threshold = 20  # 20% difference allowed
    mask = percentage_differences < threshold

    # Plot with outliers
    plt.figure(figsize=(10, 6))
    plt.scatter(y_test_rescaled, y_pred_rescaled, color='green', label='Predicted vs Actual (With Outliers)')
    plt.plot([min(y_test_rescaled), max(y_test_rescaled)], [min(y_test_rescaled), max(y_test_rescaled)], color='red',
             label='Perfect Prediction')
    plt.title('Predicted vs Actual Lmax (With Outliers)')
    plt.xlabel('Actual Lmax')
    plt.ylabel('Predicted Lmax')
    plt.legend()
    plt.grid(True)
    plt.show()

    # Plot without the outliers
    y_test_filtered = y_test_rescaled[mask[:, 0]]
    y_pred_filtered = y_pred_rescaled[mask[:, 0]]

    plt.figure(figsize=(10, 6))
    plt.scatter(y_test_filtered, y_pred_filtered, color='blue', label='Predicted vs Actual (Without Outliers)')
    plt.plot([min(y_test_filtered), max(y_test_filtered)], [min(y_test_filtered), max(y_test_filtered)], color='red',
             label='Perfect Prediction')
    plt.title('Predicted vs Actual Lmax (Without Outliers)')
    plt.xlabel('Actual Lmax (m)')
    plt.ylabel('Predicted Lmax (m)')
    plt.legend()
    plt.grid(True)
    plt.show()

    # Calculate the error (difference between actual and predicted Lmax)
    errors = np.abs(y_test_rescaled - y_pred_rescaled)

    # Sort the error values
    sorted_errors = np.sort(errors, axis=0)

    # Calculate the cumulative probabilities
    cumulative_probabilities = np.arange(1, len(sorted_errors) + 1) / len(sorted_errors)

    # Plot the ECDF
    plt.figure(figsize=(8, 6))
    plt.plot(sorted_errors, cumulative_probabilities, marker='.', linestyle='none')
    plt.xlabel('Error (|Actual Lmax - Predicted Lmax|) (m)')
    plt.ylabel('Cumulative Probability')
    plt.title('Empirical Cumulative Distribution function (ECDF) of Errors between Actual and Predicted Lmax')
    plt.grid(True)
    plt.show()

    # Absolute error calculation
    abs_errors = np.abs(y_test_rescaled - y_pred_rescaled)

    # Relative error calculation
    relative_errors = abs_errors / np.abs(y_test_rescaled)

    # Plot absolute error for different error margins
    abs_error_thresholds = [1, 5, 10]  # 1m, 5m, 10m
    error_percentages = [(abs_errors <= threshold).mean() * 100 for threshold in abs_error_thresholds]

    plt.figure(figsize=(10, 6))
    plt.bar([f"<={threshold}m" for threshold in abs_error_thresholds], error_percentages, color='skyblue')
    plt.title('Percentage of Predictions within Error Margins')
    plt.xlabel('Error Margin (m)')
    plt.ylabel('Percentage of Predictions (%)')
    plt.grid(True)
    plt.show()

    # Plot relative error
    plt.figure(figsize=(10, 6))
    plt.scatter(y_test_rescaled, relative_errors, color='purple', alpha=0.5)
    plt.axhline(y=0.1, color='red', linestyle='--', label='10% Relative Error')
    plt.axhline(y=0.05, color='green', linestyle='--', label='5% Relative Error')
    plt.title('Relative Error vs Actual Lmax')
    plt.xlabel('Actual Lmax (m)')
    plt.ylabel('Relative Error')
    plt.legend()
    plt.grid(True)
    plt.show()
    # Step 10: Calculate the absolute differences between actual and predicted values
    differences = np.abs(y_test_rescaled - y_pred_rescaled)

    # Find the index of the largest difference (likely the outlier)
    max_diff_index = np.argmax(differences)

    # Print details about the largest difference
    print("Outlier with the largest difference:")
    print(f"Actual Lmax: {y_test_rescaled[max_diff_index][0]}")
    print(f"Predicted Lmax: {y_pred_rescaled[max_diff_index][0]}")
    print(f"Difference: {differences[max_diff_index][0]}")

    # Find the indices of the top 10 largest differences
    top_100_indices = np.argsort(differences, axis=0)[-100:]

    # Extract the corresponding actual and predicted Lmax values
    top_100_actual = y_test_rescaled[top_100_indices]
    top_100_predicted = y_pred_rescaled[top_100_indices]
    top_100_differences = differences[top_100_indices]

    # Display the top 10 largest differences
    print("Top 10 largest differences between actual and predicted Lmax:")
    for i in range(100):
        print(
            f"Actual: {top_100_actual[i][0]}, Predicted: {top_100_predicted[i][0]}, Difference: {top_100_differences[i][0]}")

    # Find the indices of the top 10 smallest differences
    bottom_100_indices = np.argsort(differences, axis=0)[:100]

    # Extract the corresponding actual and predicted Lmax values
    bottom_100_actual = y_test_rescaled[bottom_100_indices]
    bottom_100_predicted = y_pred_rescaled[bottom_100_indices]
    bottom_100_differences = differences[bottom_100_indices]

    # Display the top 10 smallest differences
    print("\nTop 10 smallest differences between actual and predicted Lmax:")
    for i in range(100):
        print(
            f"Actual: {bottom_100_actual[i][0]}, Predicted: {bottom_100_predicted[i][0]}, Difference: {bottom_100_differences[i][0]}")


# GUI Creation
root = tk.Tk()
root.title("Fiber Reach Prediction")
# GUI Elements for File or Manual Input
file_button = tk.Button(root, text="Upload .mat File", command=load_file_data)
file_button.pack(pady=5)
optimize_button = tk.Button(root, text="Run Optimization", command=run_optuna)
optimize_button.pack(pady=20)
# Start the GUI loop
root.mainloop()

'''
tk.Label(root, text="Choose how to provide the vectors:").pack(pady=10)
'''
'''
manual_button = tk.Button(root, text="Enter Vectors Manually", command=enter_vectors_manually)
manual_button.pack(pady=5)
'''
