import json
from pathlib import Path
from collections import defaultdict

INPUT_FILE = "json_preprocess_usefuldata.json"
OUTPUT_FILE = "json_preprocess_usefuldata_grouped.json"


def group_by_user_singlefile():
    script_dir = Path(__file__).parent
    processed_dir = script_dir / "processed"
    processed_dir.mkdir(exist_ok=True)
    
    input_file = processed_dir / INPUT_FILE
    if not input_file.exists():
        raise FileNotFoundError(f"File non trovato: {input_file}")

    # Output file nella cartella processed
    output_file = processed_dir / OUTPUT_FILE

    # Carica eventi preprocessati
    with input_file.open("r", encoding="utf-8") as f:
        events = json.load(f)

    # Raggruppa per user_id
    grouped = defaultdict(list)

    for ev in events:
        # Usa user_id, altrimenti distinct_id, altrimenti UNKNOWN
        user_id = ev.get("user_id") or ev.get("distinct_id") or "UNKNOWN"
        grouped[user_id].append(ev)

    # Convert defaultdict in dict normale per il JSON finale
    grouped_dict = dict(grouped)

    # Salva il file unico raggruppato
    with output_file.open("w", encoding="utf-8") as f:
        json.dump(grouped_dict, f, indent=2)

    print(f"File raggruppato salvato come: {output_file}")
    return output_file


if __name__ == "__main__":
    group_by_user_singlefile()