import json
import random
from collections import defaultdict
from pathlib import Path
from typing import Dict, Any, List, Optional

from metrics_lempel_ziv import compute_lempel_ziv_metrics


PROCESSED_DIR_NAME = "processed"
DEFAULT_INPUT_FILE = "telemetry_useful__grouped_ordered_filtered_merged_cleaned.json"
FALLBACK_INPUT_FILE = "telemetry_useful__grouped_ordered_filtered_merged.json"
OUTPUT_DIR_NAME = "metrics_per_player"

# Window size (in seconds) for per-level Lempel–Ziv metrics
LZ_WINDOW_SECONDS = 30.0


def _choose_input_file(processed_dir: Path) -> Path:
    """
    Prefer the cleaned merged file (with actual_timestamp); fall back to the
    plain merged file if the cleaned one does not exist.
    """
    cleaned = processed_dir / DEFAULT_INPUT_FILE
    if cleaned.exists():
        return cleaned
    return processed_dir / FALLBACK_INPUT_FILE


def _load_grouped_events(input_path: Path) -> Dict[str, List[Dict[str, Any]]]:
    if not input_path.exists():
        raise FileNotFoundError(f"Input file not found: {input_path}")
    with input_path.open("r", encoding="utf-8") as f:
        data = json.load(f)
    if not isinstance(data, dict):
        raise ValueError("Expected top-level JSON object mapping user_id to events list")
    return data


def _extract_quiz_answers(events: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    Extract quiz answers with identifiers.
    Identifiers are assigned in order: tech_issues, severe_issues, challenge, continue
    """
    identifiers = ["tech_issues", "severe_issues", "challenge", "continue"]
    answers: List[Dict[str, Any]] = []
    idx = 0
    for ev in events:
        if ev.get("actionName") == "QuizAnswer":
            if idx < len(identifiers):
                answers.append({
                    "identifier": identifiers[idx],
                    "value": ev.get("value")
                })
                idx += 1
            else:
                # If more than 4 quiz answers, just add the value without identifier
                answers.append({
                    "value": ev.get("value")
                })
    return answers


def _count_scene_transitions(events: List[Dict[str, Any]]) -> int:
    return sum(1 for ev in events if ev.get("actionName") == "SCENE_TRANSITION")


def _get_event_timestamp(ev: Dict[str, Any]) -> Optional[float]:
    """
    Returns a numeric timestamp for an event, preferring:
    - actual_timestamp (if present, from cleaned pipeline)
    - timestamp_game
    - time
    """
    ts = ev.get("actual_timestamp")
    if ts is not None:
        return float(ts)
    ts = ev.get("timestamp_game")
    if ts is not None:
        return float(ts)
    ts = ev.get("time")
    if ts is not None:
        return float(ts)
    return None


def _compute_average_apm(events: List[Dict[str, Any]]) -> float:
    """
    Compute average actions per minute (APM) for a player's session.

    Usa lo stesso sottoinsieme di eventi considerato per le metriche LZ
    globali: solo azioni del giocatore (actionName non nullo) ed escludendo
    azioni di tipo SYSTEM e Menu.
    """
    timestamps: List[float] = []
    for ev in events:
        action_name = ev.get("actionName")
        action_type = ev.get("actionType")

        # Stessa logica del dataset LZ globale: solo azioni del giocatore,
        # escludendo SYSTEM e Menu.
        if action_name is None:
            continue
        if action_type in ("SYSTEM", "Menu"):
            continue

        ts = _get_event_timestamp(ev)
        if ts is not None:
            timestamps.append(ts)

    if len(timestamps) < 2:
        return 0.0

    start = min(timestamps)
    end = max(timestamps)
    duration_seconds = end - start
    if duration_seconds <= 0:
        return 0.0

    total_actions = len(timestamps)
    apm = total_actions / (duration_seconds / 60.0)
    return apm


def _compute_windowed_lz_metrics(
    events: List[Dict[str, Any]], window_seconds: float
) -> List[Dict[str, Any]]:
    """
    Compute Lempel–Ziv metrics in fixed time windows for each level.

    Levels are inferred from SCENE_TRANSITION events:
    - The first SCENE_TRANSITION marks entry into level 1.
    - Subsequent SCENE_TRANSITION events increment the level counter.
    - Only non-SYSTEM actions within levels (level >= 1) are considered.

    Windows:
    - Fixed width `window_seconds` from the first timestamp in the level.
    - If the last window for a level is shorter than window_seconds / 2,
      it is merged into the previous window.
    """
    if window_seconds <= 0.0:
        return []

    def _is_valid_eta(value: Any) -> bool:
        """
        Return True if eta looks like a valid, meaningful number.

        Filters out:
        - None
        - negative values (e.g. -1 sentinel)
        - extremely large values (e.g. float overflow sentinels)
        """
        if value is None:
            return False
        try:
            v = float(value)
        except (TypeError, ValueError):
            return False
        if v < 0.0:
            return False
        if v > 1e6:
            return False
        return True

    level_events: Dict[int, List[Dict[str, Any]]] = defaultdict(list)
    current_level = 0

    # Assign events to levels, skipping SYSTEM actions and pre-level (level 0)
    for ev in events:
        action_name = ev.get("actionName")
        action_type = ev.get("actionType")

        if action_name == "SCENE_TRANSITION":
            current_level += 1
            continue

        if action_type == "SYSTEM":
            continue

        if current_level > 0:
            level_events[current_level].append(ev)

    windowed_metrics: List[Dict[str, Any]] = []

    for level_idx in sorted(level_events.keys()):
        level_evts = level_events[level_idx]
        if not level_evts:
            continue

        timed_events: List[tuple[Any, float]] = []
        for ev in level_evts:
            ts = _get_event_timestamp(ev)
            if ts is not None:
                timed_events.append((ev, ts))

        if not timed_events:
            continue

        level_start_ts = timed_events[0][1]
        level_end_ts = timed_events[-1][1]
        level_duration = level_end_ts - level_start_ts
        if level_duration <= 0.0:
            continue

        W = float(window_seconds)
        # Group events into fixed windows based on offset from level_start_ts
        windows: Dict[int, List[Dict[str, Any]]] = defaultdict(list)
        for ev, ts in timed_events:
            offset = ts - level_start_ts
            if offset < 0.0:
                offset = 0.0
            window_idx = int(offset // W)
            windows[window_idx].append(ev)

        if not windows:
            continue

        max_idx = max(windows.keys())

        # Compute actual duration of the last window and apply merge rule
        last_window_start_offset = max_idx * W
        tail_duration = level_duration - last_window_start_offset
        if tail_duration < (W / 2.0) and max_idx > 0:
            # Merge last window into previous one
            windows[max_idx - 1].extend(windows[max_idx])
            del windows[max_idx]
            max_idx -= 1

        for window_idx in sorted(windows.keys()):
            window_events = windows[window_idx]
            if not window_events:
                continue

            actions: List[str] = []
            eta_values: List[float] = []
            action_name_counts: Dict[str, int] = defaultdict(int)

            for ev in window_events:
                action_name = ev.get("actionName")
                action_type = ev.get("actionType")
                # Exclude menu verbs from action sequence and frequencies
                if action_name is not None and action_type != "Menu":
                    actions.append(action_name)
                    action_name_counts[action_name] += 1

                eta_val = ev.get("eta")
                if _is_valid_eta(eta_val):
                    eta_values.append(float(eta_val))

            if not actions:
                continue

            lz = compute_lempel_ziv_metrics(actions)

            start_offset = window_idx * W
            if window_idx == max_idx:
                end_offset = level_duration
            else:
                end_offset = (window_idx + 1) * W

            window_duration = end_offset - start_offset
            action_count = len(actions)
            apm = (action_count / window_duration * 60.0) if window_duration > 0 else 0.0

            # Per-window eta dynamics
            if eta_values:
                eta_start = eta_values[0]
                eta_end = eta_values[-1]
                delta_eta = eta_end - eta_start
            else:
                eta_start = None
                eta_end = None
                delta_eta = None

            # Normalized per-action-name frequencies within the window
            if action_count > 0:
                action_name_frequencies = {
                    name: count / float(action_count)
                    for name, count in action_name_counts.items()
                }
            else:
                action_name_frequencies = {}

            windowed_metrics.append(
                {
                    "level": level_idx,
                    "window_index": window_idx,
                    "start_offset_seconds": start_offset,
                    "end_offset_seconds": end_offset,
                    "action_count": action_count,
                    "apm": apm,
                    "eta_start": eta_start,
                    "eta_end": eta_end,
                    "delta_eta": delta_eta,
                    "action_name_frequencies": action_name_frequencies,
                    "lz_phrase_count": lz["lz_phrase_count"],
                    "lz_average_phrase_length": lz["lz_average_phrase_length"],
                    "lz_normalized_complexity": lz["lz_normalized_complexity"],
                }
            )

    return windowed_metrics


def _generate_player_names(user_ids: List[str]) -> Dict[str, str]:
    """
    Generate a deterministic mapping from user_id to a plausible name with
    a running index suffix, e.g. 'Alex.1', 'Jordan.2', ...
    """
    base_names = [
        "Alex",
        "Sam",
        "Jordan",
        "Taylor",
        "Riley",
        "Casey",
        "Jamie",
        "Morgan",
        "Drew",
        "Quinn",
        "Avery",
        "Cameron",
        "Logan",
        "Parker",
        "Reese",
        "Harper",
        "Skyler",
        "Rowan",
        "Emerson",
        "Elliot",
    ]

    rng = random.Random(42)
    mapping: Dict[str, str] = {}

    # Sort user_ids to have stable numbering across runs
    for idx, user_id in enumerate(sorted(user_ids), start=1):
        base = rng.choice(base_names)
        player_name = f"{base}.{idx}"
        mapping[user_id] = player_name

    return mapping


def calculate_metrics() -> Path:
    """
    Calculate metrics per player based on merged preprocessed telemetry and
    write one JSON file per player containing:

    - player_name: randomized plausible name with index suffix
    - original_user_id: original unique identifier from telemetry
    - quiz_answers: list of quiz answer values
    - level_reached: number of SCENE_TRANSITION events
    - average_apm: average actions per minute over the session
    """
    script_dir = Path(__file__).parent
    processed_dir = script_dir / PROCESSED_DIR_NAME
    input_path = _choose_input_file(processed_dir)

    grouped = _load_grouped_events(input_path)
    user_ids = list(grouped.keys())

    if not user_ids:
        raise ValueError("No players found in input data")

    name_mapping = _generate_player_names(user_ids)

    output_dir = processed_dir / OUTPUT_DIR_NAME
    output_dir.mkdir(parents=True, exist_ok=True)

    for user_id, events in grouped.items():
        player_name = name_mapping[user_id]

        quiz_answers = _extract_quiz_answers(events)
        level_reached = _count_scene_transitions(events)
        average_apm = _compute_average_apm(events)

        # Action sequence for Lempel–Ziv metrics (only player actions),
        # exclude SYSTEM actions and menu open/close (actionType == "Menu").
        action_sequence = [
            ev.get("actionName")
            for ev in events
            if ev.get("actionName") is not None
            and ev.get("actionType") not in ("SYSTEM", "Menu")
        ]
        lz_metrics = compute_lempel_ziv_metrics(action_sequence)
        lz_window_metrics = _compute_windowed_lz_metrics(events, LZ_WINDOW_SECONDS)

        # Exclude players who answered "Yes" to both tech_issues and severe_issues
        tech_issues_answer = next((ans for ans in quiz_answers if ans.get("identifier") == "tech_issues"), None)
        severe_issues_answer = next((ans for ans in quiz_answers if ans.get("identifier") == "severe_issues"), None)
        
        if (tech_issues_answer and tech_issues_answer.get("value") == "Yes" and
            severe_issues_answer and severe_issues_answer.get("value") == "Yes"):
            # Skip saving this player's file
            continue

        metrics: Dict[str, Any] = {
            "player_name": player_name,
            "original_user_id": user_id,
            "quiz_answers": quiz_answers,
            "level_reached": level_reached,
            "average_apm": average_apm,
            "lz_phrase_count": lz_metrics["lz_phrase_count"],
            "lz_average_phrase_length": lz_metrics["lz_average_phrase_length"],
            "lz_normalized_complexity": lz_metrics["lz_normalized_complexity"],
            "lz_window_metrics": lz_window_metrics,
        }

        output_path = output_dir / f"{player_name}.json"
        with output_path.open("w", encoding="utf-8") as f:
            json.dump(metrics, f, indent=2)

    print(f"Metrics per player saved under: {output_dir}")
    return output_dir


if __name__ == "__main__":
    calculate_metrics()


