import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from openpyxl import Workbook, load_workbook
from natsort import natsorted
import numpy as np

# Percorso della cartella contenente i file CSV
gruppi = ['G1', 'Gruppi 2', 'Gruppi 4', 'Gruppi 6', 'Gruppi 9']
image_folder = os.path.join(os.getcwd(), 'Gruppi')
frame_intervals = {"G1": 10, "Gruppi 2": 20, "Gruppi 4": 40, "Gruppi 6": 60, "Gruppi 9": 90}

for gruppo in gruppi:
    current_dir = os.path.join(image_folder, gruppo, 'csv')
    output_path = os.path.join(image_folder, gruppo)
    output_excel_path = os.path.join(output_path, 'Analisi_Complessiva.xlsx')
    os.makedirs(output_path, exist_ok=True)
    
    # Creazione file Excel
    wb = Workbook()
    default_sheet = wb.active
    default_sheet.title = "Indice"
    
    # Verifica che ci siano file CSV nella cartella
    files = natsorted([f for f in os.listdir(current_dir) if f.endswith('_tracks.csv')])
    if not files:
        print(f"Nessun file CSV trovato nella cartella {current_dir}.")
        continue
    
    for file_name in files:
        file_path = os.path.join(current_dir, file_name)
        sheet_name = file_name.replace('_tracks.csv', '')
        
        # Creazione cartella per immagini
        image_folder_path = os.path.join(output_path, 'images')
        os.makedirs(image_folder_path, exist_ok=True)
        
        # Caricamento dati
        try:
            data = pd.read_csv(file_path)
            cleaned_data = data.iloc[3:].copy()
            cleaned_data.columns = data.iloc[0]
            cleaned_data = cleaned_data.iloc[1:].apply(pd.to_numeric, errors='coerce')
            
            # Normalizzazione della distanza totale
            
        except Exception as e:
            print(f"Errore nel leggere {file_name}: {e}")
            continue
        
        # Statistiche descrittive
        stats = cleaned_data.describe()
        ws = wb.create_sheet(title=sheet_name)
        ws.append(["Index"] + stats.columns.tolist())
        for row in stats.reset_index().values.tolist():
            ws.append(row)
        
        cleaned_data["Normalized Total Distance"] = cleaned_data["Total distance traveled"] / (cleaned_data["Track duration"] * frame_intervals[gruppo])
        # Creazione dei grafici
        sns.set(style="whitegrid")
        plt.figure(figsize=(18, 12))
        
        # Istogramma delle durate
        plt.subplot(2, 2, 1)
        sns.histplot(cleaned_data["Track duration"] * frame_intervals[gruppo], bins=30, kde=True, color="blue")
        plt.title("Distribuzione della Durata delle Tracce")
        plt.xlabel("Durata (min)")
        plt.ylabel("Frequenza")
        
        # Scatterplot velocità media vs durata
        plt.subplot(2, 2, 2)
        sns.scatterplot(x=cleaned_data["Track duration"] * frame_intervals[gruppo], y=cleaned_data["Track mean speed"], alpha=0.6, color="green")
        plt.title("Velocità Media vs Durata della Traccia")
        plt.xlabel("Durata (min)")
        plt.ylabel("Velocità Media")
        
        # Boxplot distanza percorsa per rapporto di confinamento
        plt.subplot(2, 2, 3)
        sns.boxplot(x=pd.cut(cleaned_data["Confinement ratio"], bins=4, labels=["Basso", "Medio", "Alto", "Molto Alto"]),
                    y=cleaned_data["Normalized Total Distance"], palette="Set2")
        plt.title("Distanza Normalizzata Percorsa per Rapporto di Confinamento")
        
        # Scatterplot rapporto di confinamento vs linearità
        plt.subplot(2, 2, 4)
        sns.scatterplot(x=cleaned_data["Confinement ratio"], y=cleaned_data["Linearity of forward progression"], alpha=0.6, color="purple")
        plt.title("Confinamento vs Linearità della Progressione")
        
        plt.tight_layout()
        fig_path = os.path.join(image_folder_path, f"{sheet_name}.png")
        plt.savefig(fig_path, dpi=300, bbox_inches='tight')
        plt.close()
    
    wb.save(output_excel_path)
    print(f"Analisi completata per {gruppo}. File salvato in: {output_excel_path}")
    
    # Definisci le colonne richieste
    required_columns = ["Track duration", "Track mean speed", "Confinement ratio", 
                        "Linearity of forward progression", "Total distance traveled"]
    
    # Caricamento del file Excel
    df = pd.read_excel(output_excel_path, sheet_name=None)
    
    # Creazione file per statistiche
    # Controlla se il file esiste, altrimenti crea un nuovo workbook
    stat_excel_path = os.path.join(output_path, 'Statistica.xlsx')

    if os.path.exists(stat_excel_path):
        stat_wb = load_workbook(stat_excel_path)
    else:
        stat_wb = Workbook()

    # Assicurati che esista un foglio "Statistiche"
    if "Statistiche" not in stat_wb.sheetnames:
        stat_wb.create_sheet("Statistiche")
    stat_ws = stat_wb["Statistiche"]

    # Aggiungi intestazioni solo se la sheet è vuota
    if stat_ws.max_row == 1:
        stat_ws.append(["Sheet Name"] + [f"Mean {col}" for col in required_columns] + [f"Std {col}" for col in required_columns])

    # Calcola media e deviazione standard per ciascun gruppo
    column_vectors_mean = {col: np.array([]) for col in required_columns}
    column_vectors_std = {col: np.array([]) for col in required_columns}
    group_labels = []

    for sheet_name, sheet_df in df.items():
        if len(sheet_df) >= 4 and all(col in sheet_df.columns for col in required_columns):
            mean_values = sheet_df.iloc[1][required_columns]
            std_values = sheet_df.iloc[2][required_columns]

            # Aggiungi i dati ai vettori
            for col in required_columns:
                column_vectors_mean[col] = np.append(column_vectors_mean[col], mean_values[col])
                column_vectors_std[col] = np.append(column_vectors_std[col], std_values[col])

            group_labels.append(sheet_name)
            row = [sheet_name] + mean_values.tolist() + std_values.tolist()
            stat_ws.append(row)

    # Salva il workbook aggiornato
    stat_wb.save(stat_excel_path)
    #column_vectors_mean["Normalized Total Distance"] = column_vectors_mean["Total distance traveled"] / (column_vectors_mean["Track duration"] * frame_intervals[gruppo])
    print(f"File Statistica.xlsx salvato in: {stat_excel_path}")
    # Generare i grafici
    sns.set(style="whitegrid")
    plt.figure(figsize=(18, 12))

    # 1. Histogram of track duration
    plt.subplot(2, 2, 1)
    sns.histplot(column_vectors_mean["Track duration"] * frame_intervals[gruppo], bins=30, kde=True, color="blue")
    plt.title("Track Duration Distribution")
    plt.xlabel("Mean Duration (min)")
    plt.ylabel("Frequency")
    plt.xlim(0, 1000)

    # 2. Scatter plot of mean speed vs track duration with error bars
    plt.subplot(2, 2, 2)
    plt.errorbar(column_vectors_mean["Track duration"] * frame_intervals[gruppo], 
                column_vectors_mean["Track mean speed"] / frame_intervals[gruppo], 
                yerr=column_vectors_std["Track mean speed"] / frame_intervals[gruppo], 
                fmt='o', color='green', alpha=0.6)
    plt.title("Mean Speed vs Track Duration with Error Bars")
    plt.xlabel("Mean Duration (min)")
    plt.ylabel("Mean Speed")
    plt.xlim(0, 1000)

    # 3. Boxplot for confinement ratio and total distance traveled
    plt.subplot(2, 2, 3)
    sns.boxplot(x=pd.cut(column_vectors_mean["Confinement ratio"], bins=4, labels=["Low", "Medium", "High", "Very High"]),
                y=column_vectors_mean["Total distance traveled"] / (column_vectors_mean["Track duration"] * frame_intervals[gruppo]), palette="Set2")
    plt.title("Total Distance Traveled by Confinement Ratio")
    plt.xlabel("Confinement Ratio")
    plt.ylabel("Total Distance Traveled")

    # 4. Scatter plot for speed vs total distance traveled with regression
    plt.subplot(2, 2, 4)
    df_plot = pd.DataFrame({
        "Mean Speed": column_vectors_mean["Track mean speed"] / frame_intervals[gruppo],
        "Total Distance Traveled": column_vectors_mean["Total distance traveled"] / (column_vectors_mean["Track duration"] * frame_intervals[gruppo]),
        "Group": group_labels
    })
    print(df_plot.head())

    # Scatterplot with regression
    sns.scatterplot(x="Mean Speed", y="Total Distance Traveled", hue="Group", 
                    data=df_plot, legend=False)
    sns.regplot(x="Mean Speed", y="Total Distance Traveled", data=df_plot, scatter=False, color='black')
    plt.xlim(0, 0.4)
    plt.title("Relationship Between Speed and Total Distance Traveled")
    plt.xlabel("Mean Speed")
    plt.ylabel("Total Distance Traveled")

    # Save the image
    output_image_path = os.path.join(output_path, 'Finale.png')  # Puoi modificare il percorso dell'immagine
    plt.savefig(output_image_path, dpi=300, bbox_inches='tight')

    # Show the plot
    plt.show()

    plt.close()
    print(f"Chart saved at: {output_image_path}")
