from __future__ import annotations

import re
from datetime import datetime, timezone, timedelta
from typing import Optional, Dict, Any, List

import numpy as np
import pandas as pd

# Optional timezone support on Windows (install tzdata if needed)
try:
    from zoneinfo import ZoneInfo  # Python 3.9+
    _ZONEINFO = True
except Exception:  # pragma: no cover
    ZoneInfo = None  # type: ignore
    _ZONEINFO = False

_TIME_RE = re.compile(r"(\d{4}/\d{2}/\d{2}\s+\d{2}:\d{2}:\d{2})")
_FLOAT_RE = re.compile(r"([0-9]*\.?[0-9]+)")


def _parse_t0_dt(path: str):
    """Parse first two header lines.

    Line 1: t0  YYYY/MM/DD HH:MM:SS  (local time at Cuneo → Europe/Rome)
    Line 2: dt  0.010000
    """
    with open(path, "r", encoding="utf-8", errors="ignore") as f:
        line1 = f.readline().strip()
        line2 = f.readline().strip()

    m = _TIME_RE.search(line1)
    if not m:
        raise ValueError(f"Could not parse t0 from: {line1!r}")

    naive_local = datetime.strptime(m.group(1), "%Y/%m/%d %H:%M:%S")
    if _ZONEINFO:
        t0_utc = naive_local.replace(tzinfo=ZoneInfo("Europe/Rome")).astimezone(timezone.utc)
    else:
        # Fallback if tzdata is missing; `pip install tzdata` to be exact
        t0_utc = naive_local.replace(tzinfo=timezone.utc)

    m2 = _FLOAT_RE.search(line2)
    if not m2:
        raise ValueError(f"Could not parse dt from: {line2!r}")
    dt_sec = float(m2.group(1))
    return t0_utc, dt_sec


def parse_txt_file(path: str, sampling_hz: int = 100) -> Dict[str, Any]:
    """Parse the Vicoforte-like TXT format (dynamic number of sensors).

    Format:
      1: t0  2025/10/13 19:51:09
      2: dt  0.010000
      3: tempo  <N channel headers>
      4: (blank)
      5+: data rows

    Returns dict with:
      - start_time: timezone-aware datetime in UTC (from line 1)
      - end_time: timezone-aware datetime in UTC (derived)
      - data: np.ndarray of shape (N, M) float32 (trailing zeros trimmed)
      - sampling_hz: int (derived from time column if stable, else from dt)
      - t_rel: np.ndarray (N,) float64, seconds from t0 (from file time column)
      - sensor_cols: List[str] column names for the M channels (as in the file)
    """
    t0_utc, dt_sec = _parse_t0_dt(path)

    df = pd.read_csv(
        path,
        sep=r"\s+",
        engine="python",
        skiprows=2,              # skip 't0' and 'dt' lines
        skip_blank_lines=True,   # handle the blank line safely
    )

    time_col: Optional[str] = None
    for c in df.columns:
        cl = str(c).strip().lower()
        if cl.startswith("tempo") or cl in {"time", "_time", "t"}:
            time_col = c
            break
    if time_col is None:
        raise ValueError("Time column not found (expected 'tempo' or similar).")

    sensor_cols: List[str] = [str(c) for c in df.columns if c != time_col]
    if len(sensor_cols) < 1:
        raise ValueError("No sensor columns found (expected at least 1 column besides time).")

    t_rel = df[time_col].astype("float64").to_numpy()
    arr = df[sensor_cols].to_numpy(dtype=np.float32, copy=False)

    # Trim trailing all-zero rows
    if arr.size:
        nz = np.where(np.any(arr != 0.0, axis=1))[0]
        valid = int(nz[-1] + 1) if nz.size else 0
        arr = arr[:valid]
        t_rel = t_rel[:valid]
    else:
        arr = arr[:0]
        t_rel = t_rel[:0]

    # Derive sampling_hz from time column if stable; else from dt
    sampling_from_time = None
    if t_rel.size >= 3:
        diffs = np.diff(t_rel[: min(5000, t_rel.size)])
        if diffs.size and np.allclose(diffs, diffs[0], rtol=1e-4, atol=1e-6) and diffs[0] > 0:
            sampling_from_time = float(diffs[0])

    if sampling_from_time:
        sampling_hz = int(round(1.0 / sampling_from_time))
    else:
        sampling_hz = int(round(1.0 / dt_sec)) if dt_sec > 0 else sampling_hz

    duration_sec = float(t_rel[-1]) if t_rel.size > 0 else 0.0
    end_time_utc = t0_utc + timedelta(seconds=duration_sec)

    return {
        "start_time": t0_utc,
        "end_time": end_time_utc,
        "data": arr,
        "sampling_hz": sampling_hz,
        "t_rel": t_rel,
        "sensor_cols": sensor_cols,
    }
