import math
from typing import List, Dict

from lempel_ziv_complexity import lempel_ziv_complexity


def _encode_actions_to_string(actions: List[str]) -> (str, int):
    """
    Encode a list of action identifiers into a compact string where each
    distinct action is mapped to a single Unicode character.

    Returns:
        encoded (str): encoded sequence
        alphabet_size (int): number of distinct symbols (|Σ|)
    """
    if not actions:
        return "", 0

    symbol_map: Dict[str, str] = {}
    next_code_point = 33  # start from '!' to avoid control characters

    encoded_chars: List[str] = []
    for action in actions:
        if action not in symbol_map:
            symbol_map[action] = chr(next_code_point)
            next_code_point += 1
        encoded_chars.append(symbol_map[action])

    encoded = "".join(encoded_chars)
    alphabet_size = len(symbol_map)
    return encoded, alphabet_size


def compute_lempel_ziv_metrics(actions: List[str]) -> Dict[str, float]:
    """
    Compute Lempel–Ziv (LZ76) metrics for a sequence of actions.

    Metrics:
        - lz_phrase_count (c(n)): number of LZ phrases
        - lz_average_phrase_length (L̄ = n / c(n))
        - lz_normalized_complexity (C_LZ = c(n) * log_k(n) / n)
          where k is the alphabet size (|Σ|).
    """
    sequence, alphabet_size = _encode_actions_to_string(actions)
    n = len(sequence)

    if n == 0:
        return {
            "lz_phrase_count": 0.0,
            "lz_average_phrase_length": 0.0,
            "lz_normalized_complexity": 0.0,
        }

    # If alphabet_size < 2, the sequence is effectively constant, so the
    # normalized complexity is defined as 0.
    if alphabet_size < 2:
        phrase_count = 1.0
        average_phrase_length = float(n)
        return {
            "lz_phrase_count": phrase_count,
            "lz_average_phrase_length": average_phrase_length,
            "lz_normalized_complexity": 0.0,
        }

    phrase_count = float(lempel_ziv_complexity(sequence))
    if phrase_count <= 0.0:
        return {
            "lz_phrase_count": 0.0,
            "lz_average_phrase_length": 0.0,
            "lz_normalized_complexity": 0.0,
        }

    average_phrase_length = float(n) / phrase_count

    # log_k(n) = ln(n) / ln(k)
    log_k_n = math.log(float(n)) / math.log(float(alphabet_size))
    normalized_complexity = (phrase_count * log_k_n) / float(n)

    return {
        "lz_phrase_count": phrase_count,
        "lz_average_phrase_length": average_phrase_length,
        "lz_normalized_complexity": normalized_complexity,
    }


__all__ = ["compute_lempel_ziv_metrics"]


