import pandas as pd
import os
import shutil
import matplotlib.pyplot as plt
import json
from scipy.signal import savgol_filter

filename = 'DataSerraGrugliasco-Labeled.csv'
base_directory = '../'
data = pd.read_csv(filename, parse_dates=['Date'])
#data['Date'] = pd.to_datetime(data['Date'], format='%d/%m/%Y %H:%M').dt.strftime('%Y-%m-%d %H:%M:%S') #Convert date string to datetime
#data.set_index(keys=['Date'], inplace=True)
print(data.head())
#data.drop_duplicates(inplace=True)

# Reading config file
config_filename = 'config.json'
cfp = open(config_filename, 'r')
config=json.load(cfp)

# Clean all directories
with os.scandir(base_directory) as it:
    for entry in it:
        if entry.name.startswith('Plant') and entry.is_dir():
            shutil.rmtree(f'{base_directory}{entry.name}')

# Function to apply moving average
def apply_moving_average(df, window_size=3):
    df = df.drop_duplicates(subset='Date')  # Remove duplicates
    df.set_index('Date', inplace=True)
    df = df.resample('D').mean().interpolate(method='time')  # Resample daily and interpolate missing values
    
    for column in ['Watermark', 'Impedance', 'Status']:
        if column in df.columns:
            df[column] = df[column].rolling(window=window_size, min_periods=1, center=True).mean()
    
    df.reset_index(inplace=True)
    return df

#for id in data['id'].unique():
for id in sorted(data.id.unique()):

    # Check and make subdirs
    subdirectory = os.path.join(base_directory, f'Plant_{id}')
    os.makedirs(subdirectory, exist_ok=True)

    # Define the path for saving the subset CSV file
    csv_file_path = os.path.join(subdirectory, 'data_export.csv')

    # Define subsets by id
    data_subset = data.loc[(data['id'] == id)]
    data_subset.sort_values('Date', inplace=True)

    #Filter values
    data_subset = data_subset.loc[(data_subset['Date'] >= '2024-03-08') & (data_subset['Date'] <= '2024-04-08')]
    data_subset = data_subset.loc[(data_subset['Impedance'] >= 12000)]
    if str(id) not in config:
        config[str(id)]={}
    else:
        config_id=config[str(id)]
        if 'low' in config_id:
            for feature_key,feature_value in config_id['low'].items():
                data_subset = data_subset.loc[data_subset[feature_key] >= feature_value]
        if 'high' in config_id:
            print(config_id["high"])
            for feature_key,feature_value in config_id['high'].items():
                data_subset = data_subset.loc[data_subset[feature_key] <= feature_value ]

    # Interpolate the data
    data_subset = apply_moving_average(data_subset)

    # Save the filtered subset to a CSV file
    data_subset.to_csv(csv_file_path, index=False)
    print(f"Data for ID {id} saved to {csv_file_path}")
    
    # Draw the plot
    fig,ax1 = plt.subplots(figsize=(16,9))
    for feature in ['Watermark', 'Impedance']:
        ax1.plot(data_subset['Date'],data_subset[feature],label=feature)
        ax1.set_xlabel('Date')  # Add an x-label to the axes.

    ax2 = ax1.twinx()
    ax2.plot(data_subset['Date'],data_subset['Status'],label='Status')

    ax1.set_title(f'Plant {id}')  # Add a title to the axes.
    ax1.legend()  # Add a legend.
    
    # Define the path for saving the subset plot file
    plot_file_path = os.path.join(subdirectory, f'Plant_{id}.pdf')
    plt.savefig(plot_file_path)
    
cfp.close()
    
json.dump(config,open(config_filename, 'w'),indent=2)
plt.show()

