import os
import glob
import pandas as pd
import numpy as np
import re
import pytz
import operator

from django.conf import settings
from datetime import datetime, time, timedelta, date
from PPA.models import XUL115
from core.utils import load_SQL_data, save_SQL_data, delete_SQL_table, table_name_from_filepath
from .models import FormFormat, FieldMapping


def import_form_data(form_id, date_naive):
    # date should be entered as time_zone naive in the time_zone of the settings file
    form_format = FormFormat.objects.get(pk=form_id)
    field_mappings = form_format.field_mappings.all()
    # Construct the path for the file to be processed
    # Note: this assumes the `file` field in FormFormat stores a path
    file_path = format_path(form_format.file, date_naive)
    found_file = open_file_in_excel(file_path)

    if found_file:
        data = extract_data_from_file(date_naive, found_file, form_format, field_mappings)
        print(data)
def open_file_in_excel(pattern, open = False):
    # Use glob to find files matching the pattern
    matching_files = glob.glob(pattern)

    # If a matching file is found, open it with the default program
    if matching_files:
        file_path = matching_files[0]
        if open:
            os.startfile(file_path)

        return file_path
    else:
        print(f"No file found for pattern: {pattern}")
        return None

def extract_data_from_file(date_naive, file_path, form_format, field_mappings):
    table_name = table_name_from_filepath(file_path)
    query = f'SELECT * FROM {table_name}'
    df = load_SQL_data('temp_data.db', table_name, query)
    if df is None:
        df = read_file_to_df(file_path, form_format.sheet)
        save_SQL_data(df, 'temp_data.db', table_name)

    timezone = pytz.timezone(settings.TIME_ZONE)
    date_aware = timezone.localize(date_naive)


    if form_format.mode == 'Absolute':
        start_row = form_format.start_row
        end_row = form_format.end_row
    elif form_format.mode == 'Search':
        start_row = find_row_by_search(df, form_format.start_search)
        end_row = find_row_by_search(df, form_format.end_search, start_row)

    # Extract the date and time from the sheet as a list for indexing
    datetimes = extract_datetime(date_naive, df, form_format, start_row, end_row)

    table_data = extract_table_data(date_aware, datetimes, df, start_row, end_row, field_mappings)
    save_data_to_model(table_data, form_format, field_mappings)
    delete_SQL_table('temp_data.db', table_name)

def read_file_to_df(file_path, sheet_spec=None):
    _, file_extension = os.path.splitext(file_path)

    if file_extension in ['.xls', '.xlsx']:
        # If sheet_spec is given as an index like [0], [1], etc.
        if sheet_spec and sheet_spec.startswith('[') and sheet_spec.endswith(']'):
            sheet_name = int(sheet_spec[1:-1])  # Convert it to an integer index
        else:
            sheet_name = sheet_spec  # Use sheet_spec as the sheet name
        df = pd.read_excel(file_path, sheet_name=sheet_name, dtype=str, header=None)
    elif file_extension == '.csv':
        df = read_variable_csv(file_path)
    else:
        raise ValueError(f"Unknown file extension: {file_extension}")

    # Add a row of NaN values at the top of the DataFrame
    empty_row = pd.DataFrame([np.nan] * df.shape[1]).T
    df = pd.concat([empty_row, df], ignore_index=True)

    # Rename columns to Excel-style column names
    df.columns = [index_to_col_label(i + 1) for i in range(df.shape[1])]

    return df

def read_variable_csv(file_path, delimiter=','):
    # Read the file line-by-line and determine the maximum number of columns
    lines = []
    max_columns = 0
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            row = [cell.strip('"') for cell in line.strip().split(delimiter)]
            max_columns = max(max_columns, len(row))
            lines.append(row)

    # Pad rows with fewer columns than max_columns
    for row in lines:
        row += [''] * (max_columns - len(row))

    # Convert the lines to a pandas DataFrame
    df = pd.DataFrame(lines)

    return df

def save_data_to_model(table_data, form_format, field_mappings):
    # Define the mapping of table names to Django model classes
    table_to_model_mapping = {
        'xul115': XUL115,
        # Add other mappings as needed
    }
    model_class = table_to_model_mapping.get(form_format.db_table_name)
    if not model_class:
        raise ValueError(f"No model found for table {form_format.db_table_name}")

    # Convert the table_data DataFrame to a list of dictionaries
    records = table_data.to_dict(orient='records')

    index_fields = [mapping.db_field_name for mapping in field_mappings if getattr(mapping, 'index_field', None) == 1]
    index_fields.append('time')

    for record in records:
        lookup = {field: record[field] for field in index_fields}  # Get the fields for lookup
        model_class.objects.update_or_create(defaults=record, **lookup)

def extract_table_data(date_aware, datetimes, df_input, start, end, field_mappings):
    # Initialize the DataFrame with time
    df = pd.DataFrame({'time': datetimes})

    # Loop through all field mappings
    for mapping in field_mappings:
        location = mapping.field_location
        # Check if location indicates a range or a single cell
        field_type, col, row_start, row_end = extract_address_or_range(location, start, end)
        if field_type == 'range':
            # Extract the data from the range in df_input
            data = [df_input.at[row, col] for row in range(row_start, row_end + 1)]
        elif field_type in ['address', 'date']:
            # Extract the data from the single cell in df_input
            if field_type == 'address':
                value = df_input.at[row_start, col]
            else:
                value = date_aware
            # Replicate the single cell value for all rows
            data = [value] * len(df)
        elif field_type == 'string':
            data = [location[1:-1]] * len(df)

        if mapping.operation:
            data = [apply_operation(val, mapping.operation) for val in data]
        # Add the data as a new column to the dataframe
        df[mapping.db_field_name] = data

    return df

def apply_operation(value, operation):
    try:
        # Convert the value to a float
        num_value = float(value)
    except ValueError:
        print(f"Unexpected value: {value}")
        return value

    if operation.startswith("*"):
        return operator.mul(num_value, float(operation[1:]))
    elif operation.startswith("/"):
        return operator.truediv(num_value, float(operation[1:]))
    elif operation.startswith("+"):
        return operator.add(num_value, float(operation[1:]))
    elif operation.startswith("-"):
        return operator.sub(num_value, float(operation[1:]))
    else:
        return num_value

def extract_datetime(date_naive, df, form_format, start_row, end_row):
    date_location = form_format.date_location
    date_format = form_format.date_index_format
    time_location = form_format.time_location
    time_format = form_format.time_index_format
    time_zone = form_format.time_zone
    first_hour_offset = form_format.time_first_hour_offset
    if time_zone:
        time_zone = pytz.timezone(time_zone)
    else:
        time_zone = pytz.timezone(settings.TIME_ZONE)

    date_type, col, start_date_row, end_date_row = extract_address_or_range(date_location, start_row, end_row)
    if date_type == 'date':
        extracted_date = date_naive
    elif date_type == 'address':
        date_value = df.at[start_date_row, col]
        extracted_date = extract_date_from_string(date_value, date_format, first_hour_offset)
    else:
        dates = [extract_date_from_string(df.at[row, col], date_format, first_hour_offset) for row in range(start_date_row, end_date_row + 1)]

    if time_location is not None:
        time_type, col, start_time_row, end_time_row = extract_address_or_range(time_location, start_row, end_row)
        if time_type == 'address':
            time_value = df.at[start_time_row, col]
            extracted_time = extract_date_from_string(time_value, time_format, first_hour_offset)
        else:
            times = [extract_date_from_string(df.at[row, col], time_format, first_hour_offset) for row in range(start_time_row, end_time_row + 1)]

        if date_type in ['address', 'date'] and time_type == 'address':
            if isinstance(extracted_date, datetime):
                extracted_date = extracted_date.date()
            extracted_datetimes = [datetime.combine(extracted_date, extracted_time)]
        elif date_type in ['address', 'date'] and time_type == 'range':
            if isinstance(extracted_date, datetime):
                extracted_date = extracted_date.date()
            extracted_datetimes = [datetime.combine(extracted_date, t) for t in times]
        elif date_type == 'range' and time_type == 'address':
            extracted_datetimes = [datetime.combine(d, extracted_time) for d in dates]
        else:
            extracted_datetimes = [datetime.combine(d, t) for d, t in zip(dates, times)]
    else:
        if date_type in ['address', 'date']:
            if isinstance(extracted_date, datetime.date) and not isinstance(extracted_date, datetime.datetime):
                extracted_date = datetime.combine(extracted_date, time())
            extracted_datetimes = [extracted_date]
        else:
            for idx, ed in enumerate(dates):
                if isinstance(ed, datetime.date) and not isinstance(ed, datetime.datetime):
                    extracted_datetimes[idx] = datetime.combine(ed, time())
            extracted_datetimes = dates

    extracted_datetimes = [time_zone.localize(extracted_datetime) for extracted_datetime in extracted_datetimes]

    return extracted_datetimes


def extract_date_from_string(date_value, date_format, first_hour_offset=0):
    regex_replacements = time_format_dict(date_value, True)

    # Convert date_format to regex pattern
    for key, value in regex_replacements.items():
        date_format = date_format.replace(key, value)
    date_pattern = re.compile(date_format)

    match = date_pattern.search(date_value)
    if match:
        # Extract components using the named groups
        components = match.groupdict()

        # Convert components to datetime format
        converted_components = {}
        reversed_dict = {v: k for k, v in regex_replacements.items()}

        for component, value in components.items():
            if value is None:
                continue

            if component in ["year", "day", "minute", "second"]:
                converted_components[component] = int(value)
            elif component == "hour":
                converted_components[component] = int(value) - first_hour_offset
            elif component == "month":
                try:
                    # Try interpreting it as a number
                    converted_components[component] = int(value)
                except ValueError:
                    # Interpret as a month name or abbreviation
                    lowered_value = value.lower()
                    if reversed_dict.get(r'(?P<month>\w{3})') == "%mmm%":
                        month_date = datetime.strptime(lowered_value, "%b")
                    else:
                        month_date = datetime.strptime(lowered_value, "%B")
                    converted_components["month"] = month_date.month

        missing_components = {"year", "month", "day", "hour", "minute", "second"} - set(converted_components.keys())

        if "year" in missing_components or "month" in missing_components or "day" in missing_components:
            # One or more date components are missing, return a time
            extracted_date = time(**{k: v for k, v in converted_components.items() if k in ["hour", "minute", "second"]})
        elif "hour" in missing_components and "minute" in missing_components and "second" in missing_components:
            # Only date components are present
            extracted_date = date(**{k: v for k, v in converted_components.items() if k in ["year", "month", "day"]})
        else:
            # Both date and time components are present
            extracted_date = datetime(**converted_components)

        return extracted_date
    else:
        raise ValueError(f"Couldn't extract a date from {date_value} using format {date_format}")

def extract_address_or_range(location, start, end):
    if location == 'date':
        return 'date', None, None, None

    if location.startswith("'") and location.endswith("'") or location.startswith('"') and location.endswith('"'):
        return 'string', None, None, None

    match = re.match(r"([A-Z]+)(\d+)", location)
    if match:
        col, row = match.groups()
        return 'address', col, int(row), int(row)  # For specific cell addresses, start and end rows are the same.
    else:
        # Assuming format like A[start, end]
        col, rows = location.split('[')[0], location.split('[')[1].split(']')[0]
        split_rows = rows.split(',')
        if split_rows[0] == "start":
            start_row = start
        else:
            start_row = int(split_rows[0])

        if split_rows[1] == "end":
            end_row = end
        else:
            end_row = int(split_rows[1])
        return 'range', col, start_row, end_row

def parse_search_value(search_value):
    """
    Parses a search value like [A='1',B='08BEL-115'] into a dictionary.
    """
    # Remove brackets and split by commas
    pairs = search_value[1:-1].split(',')

    # For each pair, split by '=' and strip extra quotes and spaces
    parsed = {pair.split('=')[0].strip(): pair.split('=')[1].strip("'") for pair in pairs}

    return parsed
def col_label_to_index(col_label):
    """Convert an Excel column label to a zero-based index."""
    index = 0
    for char in col_label:
        index = index * 26 + (ord(char.upper()) - ord('A') + 1)
    return index - 1

def index_to_col_label(n):
    """Convert a column number into an Excel column name."""
    name = ''
    while n > 0:
        n, remainder = divmod(n - 1, 26)
        name = chr(65 + remainder) + name
    return name

def find_row_by_search(df, search_value, start_row=1):
    """
    Searches the DataFrame for a row where specified columns have specified values.
    Returns the row number if found, otherwise None.
    """
    parsed_search = parse_search_value(search_value)

    # Iterate over the rows of the DataFrame starting from the given start_row
    for idx, row in df.iloc[start_row:].iterrows():
        # Check if all conditions are met for this row
        if all(row[col] == val for col, val in parsed_search.items()):
            return idx
    return None

def time_format_dict(date_value, string_format=False):
    if string_format:
        regex_replacements = {
            '%yyyy%': r'(?P<year>\d{4})',
            '%yy%': r'(?P<year>\d{2})',
            # Note: This will catch the last two digits of a year. You might have to handle it in your code logic.
            '%mmmm%': r'(?P<month>\w+)',
            '%mmm%': r'(?P<month>\w{3})',
            '%mm%': r'(?P<month>\d{2})',
            '%m%': r'(?P<month>\d{1,2})',
            '%dd%': r'(?P<day>\d{2})',
            '%d%': r'(?P<day>\d{1,2})',
            '%hh%': r'(?P<hour>\d{2})',
            '%hhap%': r'(?P<hour>\d{2}[APMapm]{2})',
            '%h%': r'(?P<hour>\d{1,2})',
            '%nn%': r'(?P<minute>\d{2})',
            '%n%': r'(?P<minute>\d{1,2})',
            '%ss%': r'(?P<second>\d{2})',
            '%s%': r'(?P<second>\d{1,2})',
        }
        return regex_replacements
    else:
        replacements = {
            '%yyyy%': date_value.strftime('%Y'),
            '%yy%': date_value.strftime('%y'),
            '%mmmm%': date_value.strftime('%B'),
            '%mmm%': date_value.strftime('%b'),
            '%mm%': date_value.strftime('%m'),
            '%m%': str(date_value.month),
            '%dd%': date_value.strftime('%d'),
            '%d%': str(date_value.day),
            '%hh%': date_value.strftime('%H'),
            '%hhap%': date_value.strftime('%I%p'), # returns time in format 01PM/AM
            '%h%': str(date_value.hour),
            '%nn%': date_value.strftime('%M'),
            '%n%': str(date_value.minute),
            '%ss%': date_value.strftime('%S'),
            '%s%': str(date_value.second),
        }
        return replacements

def format_path(path_template, date_naive):
    # Define possible date format replacements
    replacements = time_format_dict(date_naive)

    # Replace all placeholders in the template
    for placeholder, value in replacements.items():
        path_template = path_template.replace(placeholder, value)

    return path_template