from calendar import c
from statistics import correlation
import numpy as np
import pandas as pd

from rt1.rtfits import Fits
import rt1.volume as rt_V
import rt1.surface as rt_SRF

from scipy.stats import pearsonr
import dask
from dask import delayed

import os
import sys
import time
from datetime import datetime, timedelta
import math

import ast
import rasterio
import matplotlib as mpl
import matplotlib.pyplot as plt
import geopandas as gpd

from clean_geo_scripts import get_aoi_boundaries, get_target_grid_boundaries, generate_target_grid, generate_geo_dataset


#######################################################################################################################################
# Function to create a tuple of (x_UTM, y_UTM)
def create_coords_column(row):
    return (row['x_UTM'], row['y_UTM'])
#######################################################################################################################################


#######################################################################################################################################
# Step 1: define the set_V_SRF function, 
def set_V_SRF(N, omega, ts, tau):
    V = rt_V.HenyeyGreenstein(omega = omega, tau = tau, t=0, ncoefs=20) #isotropic is equivalent to HG with t=0, omega is a fitting parameter, and tau is obtained from dataset
    SRF = rt_SRF.HenyeyGreenstein(t=ts, NormBRDF = N, ncoefs=20) #ts and N are fitting parameters
    return V, SRF
#######################################################################################################################################


#######################################################################################################################################
def get_fitting_results_one_cell(original_one_cell_df):
    folder_path = r'C:\Users\Mario\Desktop\Polito Large Files\Thesis\NetCDF_S1_Caluso\UTM'
    x_i = int(original_one_cell_df.iloc[0]['x_UTM'])
    y_i = int(original_one_cell_df.iloc[0]['y_UTM'])


    ###################### STEP 2A: SETTING UP DATASET ####################################
    one_cell_df = original_one_cell_df.copy()
    # Drop unwanted columns (all columns that are not needed for the fitting procedure)
    one_cell_df.drop(columns=['x_UTM', 'y_UTM','coords', 'lon', 'lat','swvl1'], inplace=True)
    # Rename columns to match naming convention expected for the RT1 Fits class
    one_cell_df.rename(columns={'acquisition_date': 'date', 'sigma0': 'sig', 'incidence_angle': 'inc', 'rel_orbit_nb': 'omega_dyn'}, inplace=True)
    # Convert angle values from degrees to radians
    one_cell_df['inc'] = np.deg2rad(one_cell_df['inc'])
    # Convert 'acquisition_date' column to datetime64[ns]
    one_cell_df['date'] = pd.to_datetime(one_cell_df['date'])
    # Set 'acquisition_date' as the index
    one_cell_df.set_index('date', inplace=True)
    ###################### END OF STEP 2A: SETTING UP DATASET ####################################

    ###################### STEP 2B: SETTING UP PARAMETER SPECIFICATIONS ####################################
    defdict = dict(
                   N =      [True, .025, 'index', ([0.01], [0.075])],
                   omega =  [True, .25, 'manual', ([0.01], [0.5])],
                   ts =     [True, .2, None, ([0.01], [0.5])],
                   tau =    [False, 'auxiliary']
                   )
    ###################### END OF STEP 2B: SETTING UP PARAMETER SPECIFICATIONS ####################################

    ###################### STEP 2C: INITIATING AN INSTANCE OF FITS CLASS ####################################
    #Our fits setup:
    fit = Fits(dataset=one_cell_df, verbose=0, 
               sig0=True, dB=False, 
               set_V_SRF = set_V_SRF,
               defdict=defdict, int_Q=True,
               lsq_kwargs=dict(verbose=0, 
                               max_nfev=300))
    ###################### END OF STEP 2C: INITIATING AN INSTANCE OF FITS CLASS ####################################

    ###################### STEP 3: PERFORMING FIT FOR ONE CELL AND COMBINING FITTING RESULTS WITH INPUT DATA ####################################
    fit.model_definition
    fit.performfit(print_progress=False)
    fit_results_df = fit.res_df
    fit_results_df.rename_axis('date',inplace=True)

    # Doing necessary modifications to the original DF to be able to easily join input data (x_UTM, y_UTM, and swvl1) to the DF containing fitting results
    # Convert 'acquisition_date' column to datetime64[ns]
    output_one_cell_df = original_one_cell_df.copy()
    output_one_cell_df.rename(columns={'acquisition_date': 'date'}, inplace=True)
    output_one_cell_df['date'] = pd.to_datetime(output_one_cell_df['date'])
    output_one_cell_df.drop_duplicates(subset=['date'], inplace=True)
    # Set 'acquisition_date' as the index
    output_one_cell_df.set_index('date', inplace=True)
    output_one_cell_df.drop(columns=['sigma0','incidence_angle'], inplace=True)

    combined_df = pd.concat([output_one_cell_df,fit_results_df], axis=1)
    combined_df.reset_index(inplace=True) #setting date back as a column so that we can concatenate results from different cells without having any issues
    ###################### END OF STEP 3: PERFORMING FIT FOR ONE CELL AND COMBINING FITTING RESULTS WITH INPUT DATA ####################################

    return combined_df
#######################################################################################################################################





if __name__ == '__main__':
    # Get the start time
    t0_ns = time.time_ns()
    # Replace 'file_path.csv' with the path to your CSV file
    folder_path = r'C:\Users\Mario\Desktop\Polito Large Files\Thesis\NetCDF_S1_Caluso\UTM'
    target_window_size = 12 # in days

    ##CODE TO DO FITTING FOR ALL EXTRACTED SAR DATA
    r'''
    input_dataset_filename = 'combined_SAR_and_ERA5_data.csv' #'combined_SAR_and_ERA5_data-noduplicates-onlysecondimagedata.csv' #'combined_SAR_and_ERA5_data.csv'
    output_dataset_filename = 'SAR_ERA5_data_with_fitting.csv'
    input_dataset_file_path = os.path.join(folder_path,input_dataset_filename)
    output_dataset_file_path = os.path.join(folder_path,output_dataset_filename)
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(input_dataset_file_path)
    # Apply the function to create the new 'coords' column
    df['coords'] = df.apply(create_coords_column, axis=1)

    # Get unique values in the 'coords' column
    unique_coords = list(df['coords'].unique())
    # Wrap the function with dask.delayed to create a delayed version of it
    delayed_get_fitting_results_one_cell = delayed(get_fitting_results_one_cell)
    # Create delayed tasks for each (x_i, y_i) in unique_coords
    delayed_tasks = [delayed_get_fitting_results_one_cell(df.loc[(df['x_UTM'] == x_i) & (df['y_UTM'] == y_i)]) for x_i, y_i in unique_coords]
    # Compute the delayed tasks in parallel
    results = dask.compute(*delayed_tasks, scheduler='threads')  # You can choose 'threads' or 'processes' as the scheduler

    # Assuming results is a list of pandas DataFrames
    combined_df = pd.concat(results, axis=0, ignore_index=True)
    combined_df.to_csv(output_dataset_file_path, index=True)
    #'''

    ###CODE TO DO PEARSON TEST (BASED ON RT1 FITTING FOR ALL TARGET GRID DATA) AND ANALYZE PEARSON RESULTS DISTRIBUTIONS
    r'''
    shapefile_path = r'C:\Users\Mario\OneDrive - Politecnico di Torino\Mario Chalouhy - Thesis\04-Checking different Radarsat products\CanaleCaluso_agri_areas\CanaleCaluso_agri.shp'
    input_dataset_filename = 'SAR_ERA5_data_with_fitting.csv'
    input_dataset_file_path = os.path.join(folder_path,input_dataset_filename)
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(input_dataset_file_path)
    df['coords'] = df['coords'].apply(ast.literal_eval) #convert 'coords' values from string representations of tuples to actual tuples
    # Get unique values in the 'coords' column
    unique_coords = list(df['coords'].unique())
    # List to store correlation results along with corresponding coordinate pair
    correlation_results_with_coords = []

    # Iterate over each unique coordinate pair
    for coord_pair in unique_coords:
        # Filter DataFrame for the current coordinate pair
        subset_df = df[df['coords'] == coord_pair]
        #print(subset_df[['date','N','swvl1']].head())
        # Extract 'N' and 'swvl1' columns for the current coordinate pair
        N_data = subset_df['N']
        swvl1_data = subset_df['swvl1']
        # Perform Pearson correlation test
        correlation_coefficient, p_value = pearsonr(N_data, swvl1_data)
        # Store correlation result along with corresponding coordinate pair
        correlation_results_with_coords.append((coord_pair, correlation_coefficient, p_value)) #this saves coord_pair as an actual tuple

    # Convert correlation results to DataFrame
    correlation_df = pd.DataFrame(correlation_results_with_coords, columns=['coord_pair', 'correlation_coefficient', 'p_value'])

    # Plot distribution of Pearson correlation coefficient
    correlation_df['correlation_coefficient'].plot(kind='hist', bins=20, density=True)

    plt.xlabel('Pearson correlation coefficient')
    plt.ylabel('Probability density')
    plt.title('Distribution of Pearson correlation coefficient')
    plt.show()

    ##PLOT SPATIAL DISTRIBUTION OF PEARSON CORRELATION COEFFICIENT
    aoi_boundaries = get_aoi_boundaries(print_results=False,wgs84_flag=False)
    target_aoi_boundaries = get_target_grid_boundaries(aoi_boundaries)
    geo_dataset = generate_geo_dataset(correlation_df, target_aoi_boundaries)

    fig, ax = plt.subplots(figsize=(10, 8))
    # Plot the map
    geo_dataset['pearson_correlation'].plot(ax=ax)
    # Plot the shapefile
    shapefile = gpd.read_file(shapefile_path)
    shapefile.plot(ax=ax, color='none', edgecolor='black', alpha=0.15)
    plt.show()
    #'''



    ####CODE TO FORCE THE OBTAINED FITTING RESULTS (AFTER EXCLUDING OUTLIER CELLS) ONTO A REGULAR TIMESCALE
    r'''
    #input_dataset_filename = 'SAR_ERA5_data_with_fitting.csv' #file with the 5 outlier cells included
    input_dataset_filename = 'SAR_ERA5_data_w_fitting_outliercellsremoved.csv' #this is the resulting "fitting results" file after removing the 5 outlier cells
    input_dataset_file_path = os.path.join(folder_path,input_dataset_filename)
    #output_dataset_file_path = os.path.join(folder_path,output_dataset_filename)
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(input_dataset_file_path)
    df['coords'] = df['coords'].apply(ast.literal_eval) #convert 'coords' values from string representations of tuples to actual tuples
    # Get unique values in the 'coords' column
    unique_coords = list(df['coords'].unique())
    df['date'] = pd.to_datetime(df['date'])
    #print(df.columns) # ['Unnamed: 0', 'date', 'x_UTM', 'y_UTM', 'rel_orbit_nb', 'lon', 'lat', 'tau', 'swvl1', 'coords', 'N', 'omega', 'ts']
    df.drop(columns=['Unnamed: 0', 'rel_orbit_nb', 'tau', 'omega', 'ts','swvl1'], inplace=True)
    #print(df.columns) # ['date', 'x_UTM', 'y_UTM', 'lon', 'lat', 'coords', 'N'] <- these are the columns that are left in df

    # Filter the DataFrame for the period 2020-2021
    subset_df_2020_2021 = df[(df['date'] >= '2020-01-01') & (df['date'] <= '2021-12-31')]
    # Filter the DataFrame for the period 2022
    subset_df_2022 = df[(df['date'] >= '2022-01-01') & (df['date'] <= '2022-12-31')]
    period_subsets = [('2021',subset_df_2020_2021), ('2222',subset_df_2022)]

    for period_str,df in period_subsets:
        # List to store resampled DataFrames for each coordinate pair
        resampled_dfs = []
        # Iterate over each unique coordinate pair
        for coord_pair in unique_coords:
            # Filter DataFrame for the current coordinate pair
            subset_df = df[df['coords'] == coord_pair]
            subset_df = subset_df.copy()
            # Select any row from the original DataFrame to extract the values for 'x_UTM', 'y_UTM', 'lon', 'lat', and 'coords'
            # Apply the custom resampling function to your subset_df
            regular_df = custom_resample(subset_df,window_size=target_window_size)
            print(f'resampled_df:\n{regular_df.head()}')
            print(f'\noriginal_df:\n{subset_df.iloc[:15]}')
            sys.exit()
            resampled_dfs.append(regular_df)
        # Concatenate resampled DataFrames for all coordinate pairs into a single DataFrame
        final_df = pd.concat(resampled_dfs, ignore_index=True)
        # Print the resulting DataFrame
        print(final_df)
        total_nan_N_column = final_df['N'].isna().sum()
        print(f"\nTotal number of NaN values in the 'N' column for period {period_str}: {total_nan_N_column}")
        final_df.to_csv(os.path.join(folder_path,f'resampled_N_values_{target_window_size}Dwindow_{period_str}.csv'))
    #'''

    ###CODE TO DO GET SM TIMESERIES FROM N BASED ON RATIO OF MAXIMA
    r"""
    #input_dataset_filename = 'SAR_ERA5_data_with_fitting.csv'
    #input_dataset_filename = 'SAR_ERA5_data_w_fitting_outliercellsremoved.csv'
    #input_dataset_filename = 'N_swvl1_resampled_12D_2021.csv'
    input_dataset_filename = 'N_swvl1_resampled_12D_2222.csv'
    input_dataset_file_path = os.path.join(folder_path,input_dataset_filename)
    #full_output_dataset_filename = 'SAR_swvl1_SM_timeseries_resampled2021.csv'
    #main_output_dataset_filename = 'SM_timeseries_from_RT1_resampled2021.csv'
    full_output_dataset_filename = 'SAR_swvl1_SM_timeseries_resampled2222.csv'
    main_output_dataset_filename = 'SM_timeseries_from_RT1_resampled2222.csv'
    full_output_dataset_file_path = os.path.join(folder_path,full_output_dataset_filename)
    main_output_dataset_file_path = os.path.join(folder_path,main_output_dataset_filename)
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(input_dataset_file_path)
    df['coords'] = df['coords'].apply(ast.literal_eval) #convert 'coords' values from string representations of tuples to actual tuples
    # Get unique values in the 'coords' column
    unique_coords = list(df['coords'].unique())
    #print(df['coords'].values[0])                               # (401166.0, 5024068.0)
    #print(type(df['coords'].values[0]))                         # <class 'tuple'>
    ### TESTING: PICK JUST ONE PAIR OF COORDINATES FOR TESTING PURPOSES
    ##I will pick one specific set of coordinates to work with for the rest of this attempt (later, will generalize the code to go through all 510 pairs of coordinates)
    #x_test = 411666.0
    #y_test = 5008568.0
    #unique_coords = [(x_test,y_test)]

    ## List to store all calculated s2 values
    #s2_values = []

    # List to store s2_values with corresponding coord_pairs
    s2_values_with_coords = []
    #correlation_results_with_coords = []

    # Iterate over each unique coordinate pair
    for coord_pair in unique_coords:
        # Filter DataFrame for the current coordinate pair
        subset_df = df[df['coords'] == coord_pair]
        # Extract 'N' and 'swvl1' columns for the current coordinate pair
        N_data = subset_df['N']
        swvl1_data = subset_df['swvl1']
        # Get the maximum value in N_data
        max_N_value = N_data.max()
        max_swvl1_value = swvl1_data.max()
        s2_value = max_N_value/max_swvl1_value
        # Store s2_value with corresponding coord_pair
        s2_values_with_coords.append((coord_pair, s2_value))
        #s2_values.append(s2_value)

    #print(f's2_max : {max(s2_values)}')                     # s2_max : 0.18579569226507597
    #print(f's2_min : {min(s2_values)}')                     # s2_min : 0.10465289086683625
    #print(f's2_mean : {sum(s2_values)/len(s2_values)}')     # s2_mean : 0.14903067239564988

    # Assign s2_values to each row in the DataFrame based on "coord_pair" column
    for coord_pair, s2_value in s2_values_with_coords:
        df.loc[df['coords'] == coord_pair, 's2'] = s2_value

    # Create the "SM" column by dividing "N" column by "s2" column
    df['SM'] = df['N'] / df['s2']
    if 'Unnamed: 0' in df:
        df.drop(columns=['Unnamed: 0'],inplace=True)
    #print(df.columns)
    df.to_csv(full_output_dataset_file_path,index=False)
    main_results_df = df[['date', 'x_UTM', 'y_UTM', 'lon', 'lat', 'SM']]
    print(main_results_df.head())
    main_results_df.to_csv(main_output_dataset_file_path,index=False)
    #"""


    ###CODE TO DO PEARSON TEST FOR RT1 RESULTS FROM IRREGULAR TIMESERIES  AND ANALYZE PEARSON RESULTS DISTRIBUTIONS - PER PERIOD
    r'''
    #input_dataset_filename = 'SAR_ERA5_data_with_fitting.csv' #file with the 5 outlier cells included
    input_dataset_filename = 'SAR_ERA5_data_w_fitting_outliercellsremoved.csv' #this is the resulting "fitting results" file after removing the 5 outlier cells
    input_dataset_file_path = os.path.join(folder_path,input_dataset_filename)
    #output_dataset_file_path = os.path.join(folder_path,output_dataset_filename)
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(input_dataset_file_path)
    df['coords'] = df['coords'].apply(ast.literal_eval) #convert 'coords' values from string representations of tuples to actual tuples
    # Get unique values in the 'coords' column
    unique_coords = list(df['coords'].unique())
    df['date'] = pd.to_datetime(df['date'])
    ## Line below is for the case where we don't want to split our time period into two periods.
    #period_subsets = [('2022', df)]

    # Filter the DataFrame for the period 2020-2021
    subset_df_2020_2021 = df[(df['date'] >= '2020-01-01') & (df['date'] <= '2021-12-31')]
    # Filter the DataFrame for the period 2022
    subset_df_2022 = df[(df['date'] >= '2022-01-01') & (df['date'] <= '2022-12-31')]
    period_subsets = [('2021',subset_df_2020_2021), ('2222',subset_df_2022)]

    for period_str,df in period_subsets:
        # List to store correlation results along with corresponding coordinate pair
        correlation_results_with_coords = []

        # Iterate over each unique coordinate pair
        for coord_pair in unique_coords:
            # Filter DataFrame for the current coordinate pair
            subset_df = df[df['coords'] == coord_pair]
            #print(subset_df[['date','N','swvl1']].head())
            # Extract 'N' and 'swvl1' columns for the current coordinate pair
            N_data = subset_df['N']
            swvl1_data = subset_df['swvl1']

            # Perform Pearson correlation test
            correlation_coefficient, p_value = pearsonr(N_data, swvl1_data)
            # Store correlation result along with corresponding coordinate pair
            correlation_results_with_coords.append((coord_pair, correlation_coefficient, p_value, len(subset_df)))

        # Convert correlation results to DataFrame
        correlation_df = pd.DataFrame(correlation_results_with_coords, columns=['coord_pair', 'correlation_coefficient', 'p_value', 'total_nb_of_rows'])
        correlation_df.to_csv(os.path.join(folder_path,f'correlation_results1_no_outliers_{period_str}.csv'))
    #'''

    ##CODE TO PLOT ANALYSIS RESULTS - PER PERIOD - IRRGEULAR TIMESERIES
    r'''
    #correlation_2021_file_path = os.path.join(folder_path,f'correlation_results1_2021.csv')
    #correlation_2222_file_path = os.path.join(folder_path,f'correlation_results1_2222.csv')
    correlation_2021_file_path = os.path.join(folder_path,f'correlation_results1_no_outliers_2021.csv')
    correlation_2222_file_path = os.path.join(folder_path,f'correlation_results1_no_outliers_2222.csv')
    resampled_correlation_2021_file_path = os.path.join(folder_path,'correlation_results1_12dsampling_2021.csv')
    resampled_correlation_2222_file_path = os.path.join(folder_path,'correlation_results1_12dsampling_2222.csv')
    correlation_df_2021 = pd.read_csv(correlation_2021_file_path)
    correlation_df_2222 = pd.read_csv(correlation_2222_file_path)
    resampled_correlation_df_2021 = pd.read_csv(resampled_correlation_2021_file_path)
    resampled_correlation_df_2222 = pd.read_csv(resampled_correlation_2222_file_path)
    correlation_dfs = [('2020-2021',correlation_df_2021),('2022-2022',correlation_df_2222)]
    shapefile_path = r'C:\Users\Mario\OneDrive - Politecnico di Torino\Mario Chalouhy - Thesis\04-Checking different Radarsat products\CanaleCaluso_agri_areas\CanaleCaluso_agri.shp'
    # Read the shapefile
    shapefile = gpd.read_file(shapefile_path)
    ## get target_aoi_boundaries needed for spatial plotting
    aoi_boundaries = get_aoi_boundaries(print_results=False,wgs84_flag=False)
    target_aoi_boundaries = get_target_grid_boundaries(aoi_boundaries)

    for i, (period_str, correlation_df) in enumerate(correlation_dfs):
        # Calculate alpha value dynamically
        alpha_value = 1 - i * 0.5
        # Plot distribution of Pearson correlation coefficient
        correlation_df['correlation_coefficient'].plot(kind='hist', bins=20, density=True, alpha=alpha_value, label=period_str)
    plt.xlabel('Pearson correlation coefficient')
    plt.ylabel('Probability density')
    plt.title(f'Distribution of Pearson correlation coefficient')
    plt.legend()
    plt.show()

    fig, axs = plt.subplots(1, 2, figsize=(20, 8))
    for i, (period_str, correlation_df) in enumerate(correlation_dfs):
            geo_dataset = generate_geo_dataset(target_aoi_boundaries, correlation_df)    
            # Plot the map
            geo_dataset['pearson_correlation'].plot(ax=axs[i])
            shapefile.plot(ax=axs[i], color='none', edgecolor='black', alpha=0.15, label=period_str)
            # Add title
            axs[i].set_title(period_str)
    plt.show()
    #'''



    ###CODE TO DO PEARSON TEST FOR RT1 RESULTS FROM RESAMPLED TIMESERIES AND ANALYZE PEARSON RESULTS DISTRIBUTIONS - PER PERIOD
    r'''
    period_subsets = ['2021','2222']

    for period_str in period_subsets:

        N_swvl1_fn = f'N_swvl1_resampled_12D_{period_str}.csv'
        N_swvl1_data_filepath = os.path.join(folder_path,N_swvl1_fn)
        output_correlation_fn = f'correlation_results1_12dsampling_{period_str}.csv'
        output_correlation_filepath = os.path.join(folder_path,output_correlation_fn)
        df = pd.read_csv(N_swvl1_data_filepath)
        df['coords'] = df['coords'].apply(ast.literal_eval) #convert 'coords' values from string representations of tuples to actual tuples
        # Get unique values in the 'coords' column
        unique_coords = list(df['coords'].unique())
        df['date'] = pd.to_datetime(df['date'])

        # List to store correlation results along with corresponding coordinate pair
        correlation_results_with_coords = []

        # Iterate over each unique coordinate pair
        for coord_pair in unique_coords:
            # Filter DataFrame for the current coordinate pair
            subset_df = df[df['coords'] == coord_pair]
            #print(subset_df[['date','N','swvl1']].head())
            # Extract 'N' and 'swvl1' columns for the current coordinate pair
            N_data = subset_df['N']
            swvl1_data = subset_df['swvl1']

            # Perform Pearson correlation test
            correlation_coefficient, p_value = pearsonr(N_data, swvl1_data)
            # Store correlation result along with corresponding coordinate pair
            correlation_results_with_coords.append((coord_pair, correlation_coefficient, p_value, len(subset_df)))

        # Convert correlation results to DataFrame
        correlation_df = pd.DataFrame(correlation_results_with_coords, columns=['coord_pair', 'correlation_coefficient', 'p_value', 'total_nb_of_rows'])
        correlation_df.to_csv(output_correlation_filepath)
    #'''

    ##CODE TO PLOT ANALYSIS RESULTS - PER PERIOD - COMPARING ORGINAL TO RESAMPLED SERIES:
    r'''
    #correlation_2021_file_path = os.path.join(folder_path,f'correlation_results1_2021.csv')
    #correlation_2222_file_path = os.path.join(folder_path,f'correlation_results1_2222.csv')
    correlation_2021_file_path = os.path.join(folder_path,f'correlation_results1_no_outliers_2021.csv')
    correlation_2222_file_path = os.path.join(folder_path,f'correlation_results1_no_outliers_2222.csv')
    resampled_correlation_2021_file_path = os.path.join(folder_path,'correlation_results1_12dsampling_2021.csv')
    resampled_correlation_2222_file_path = os.path.join(folder_path,'correlation_results1_12dsampling_2222.csv')
    correlation_df_2021 = pd.read_csv(correlation_2021_file_path)
    correlation_df_2222 = pd.read_csv(correlation_2222_file_path)
    resampled_correlation_df_2021 = pd.read_csv(resampled_correlation_2021_file_path)
    resampled_correlation_df_2222 = pd.read_csv(resampled_correlation_2222_file_path)

    correlation_dfs = [('2020-2021',[('original series',correlation_df_2021),('resampled series',resampled_correlation_df_2021)]),
                      ('2022-2022',[('original series',correlation_df_2222),('resampled series',resampled_correlation_df_2222)])]
    shapefile_path = r'C:\Users\Mario\OneDrive - Politecnico di Torino\Mario Chalouhy - Thesis\04-Checking different Radarsat products\CanaleCaluso_agri_areas\CanaleCaluso_agri.shp'
    # Read the shapefile
    shapefile = gpd.read_file(shapefile_path)
    ## get target_aoi_boundaries needed for spatial plotting
    aoi_boundaries = get_aoi_boundaries(print_results=False,wgs84_flag=False)
    target_aoi_boundaries = get_target_grid_boundaries(aoi_boundaries)

    for period_str, correlation_dfs_per_period in correlation_dfs:
        for i, (series_label,correlation_df) in enumerate(correlation_dfs_per_period):
            # Calculate alpha value dynamically
            alpha_value = 1 - i * 0.5
            # Plot distribution of Pearson correlation coefficient
            correlation_df['correlation_coefficient'].plot(kind='hist', bins=20, density=True, alpha=alpha_value, label=series_label)
        plt.xlabel('Pearson correlation coefficient')
        plt.ylabel('Probability density')
        plt.title(f'Distribution of Pearson correlation coefficient, {period_str}')
        plt.legend()
        plt.show()

    for period_str, correlation_dfs_per_period in correlation_dfs:
        fig, axs = plt.subplots(1, 2, figsize=(20, 8))
        for i, (series_label,correlation_df) in enumerate(correlation_dfs_per_period):
                geo_dataset = generate_geo_dataset(target_aoi_boundaries, correlation_df)    
                # Plot the map
                geo_dataset['pearson_correlation'].plot(ax=axs[i])
                shapefile.plot(ax=axs[i], color='none', edgecolor='black', alpha=0.15)
                # Add title
                axs[i].set_title(series_label)
        plt.show()
    #'''


    ##CODE TO COMBINE CORRELATION RESULTS OF THE IRRGEULAR AND RESAMPLED N TIMESERIES INTO ONE, THEN TO PLOT DIFFERENCE SPATIALLY
    r'''
    # Creating df's with difference in correlation coefficients (resampled_correlation minus original_correlation)
    # Creating diff df for period 2020-2021
    correlation_comb_df_2021 = resampled_correlation_df_2021.copy()
    correlation_comb_df_2021.rename(columns={'correlation_coefficient': 'correlation_coefficient_resampled', 
                        'p_value': 'p_value_resampled', 
                        'total_nb_of_rows': 'total_nb_of_rows_resampled'}, inplace=True)
    correlation_comb_df_2021 = pd.merge(correlation_df_2021, correlation_comb_df_2021, on='coord_pair', how='inner')
    correlation_comb_df_2021.rename(columns={'correlation_coefficient': 'correlation_coefficient_original', 
                        'p_value': 'p_value_original', 
                        'total_nb_of_rows': 'total_nb_of_rows_original'}, inplace=True)
    correlation_comb_df_2021['corr_res_minus_corr_org'] = correlation_comb_df_2021['correlation_coefficient_resampled'] - correlation_comb_df_2021['correlation_coefficient_original']
    correlation_comb_df_2021.to_csv(os.path.join(folder_path,'correlation_combined_df_2021.csv'))

    # Creating diff df for period 2022-2022
    correlation_comb_df_2222 = resampled_correlation_df_2222.copy()
    correlation_comb_df_2222.rename(columns={'correlation_coefficient': 'correlation_coefficient_resampled', 
                        'p_value': 'p_value_resampled', 
                        'total_nb_of_rows': 'total_nb_of_rows_resampled'}, inplace=True)
    correlation_comb_df_2222 = pd.merge(correlation_df_2222, correlation_comb_df_2222, on='coord_pair', how='inner')
    correlation_comb_df_2222.rename(columns={'correlation_coefficient': 'correlation_coefficient_original', 
                        'p_value': 'p_value_original', 
                        'total_nb_of_rows': 'total_nb_of_rows_original'}, inplace=True)
    correlation_comb_df_2222['corr_res_minus_corr_org'] = correlation_comb_df_2222['correlation_coefficient_resampled'] - correlation_comb_df_2222['correlation_coefficient_original']
    correlation_comb_df_2021.to_csv(os.path.join(folder_path,'correlation_combined_df_2222.csv'))
    #'''

    ##CODE TO PLOT RESULTS OF THE COMBINED CORRELATION RESULTS OF THE IRRGEULAR AND RESAMPLED N TIMESERIES
    r'''
    shapefile_path = r'C:\Users\Mario\OneDrive - Politecnico di Torino\Mario Chalouhy - Thesis\04-Checking different Radarsat products\CanaleCaluso_agri_areas\CanaleCaluso_agri.shp'
    # Read the shapefile
    shapefile = gpd.read_file(shapefile_path)
    ## get target_aoi_boundaries needed for spatial plotting
    aoi_boundaries = get_aoi_boundaries(print_results=False,wgs84_flag=False)
    target_aoi_boundaries = get_target_grid_boundaries(aoi_boundaries)

    correlation_comb_df_2021 = pd.read_csv(os.path.join(folder_path,'correlation_combined_df_2021.csv'))
    correlation_comb_df_2222 = pd.read_csv(os.path.join(folder_path,'correlation_combined_df_2222.csv'))
    correlation_comb_dfs = [('2020-2021',correlation_comb_df_2021),('2022-2022',correlation_comb_df_2222)]
    fig, axs = plt.subplots(1, 2, figsize=(20, 8))
    vmin, vmax = -0.30, 0.30  # Specify the color scale range
    for i, (period_str, correlation_comb_df) in enumerate(correlation_comb_dfs):
            geo_dataset = generate_geo_dataset(target_aoi_boundaries, correlation_comb_df, data_column_name='corr_res_minus_corr_org', initialize_to_nan=True)    
            # Plot the map
            geo_dataset['pearson_correlation'].plot(ax=axs[i], vmin=vmin, vmax=vmax, cmap='RdBu')
            shapefile.plot(ax=axs[i], color='none', edgecolor='black', alpha=0.15)
            # Add title
            axs[i].set_title(period_str)
    plt.show()
    #'''




