from imapclient import IMAPClient
import mailparser
import argparse
import textwrap
import subprocess
import tempfile
import sys
import io
import os
import base64
from dotenv import load_dotenv
import hashlib
import json

# -------------------- Config --------------------
dotenv_path = os.path.join(os.path.dirname(__file__), "..", ".env")
load_dotenv(dotenv_path)

HOST = os.getenv("IMAP_HOST")
USERNAME = os.getenv("IMAP_USER")
PASSWORD = os.getenv("IMAP_PASS")
PEC_CA_CERT = "AgIDCA1_20210921.pem"
OUTPUT_DIR = "../pdfs/files"
FORENSIC_FOLDER = "../pdfs/forensic_copy"
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(FORENSIC_FOLDER, exist_ok=True)

# -------------------- Helper Functions --------------------
def compute_hashes(data):
    """Compute SHA256 and MD5 for forensic integrity."""
    return {
        "sha256": hashlib.sha256(data).hexdigest(),
        "md5": hashlib.md5(data).hexdigest()
    }

def save_pdf(filename, data, mail=None, uid=None, prefix=""):
    """Save PDF attachment and create forensic JSON metadata."""
    path = os.path.join(OUTPUT_DIR, prefix + filename)
    forensic_json_path = os.path.join(FORENSIC_FOLDER, f"{prefix}{filename}.json")

    # --- Check if file already exists ---
    if os.path.exists(path) or os.path.exists(forensic_json_path):
        print(f"[*] File or metadata already exists, skipping download: {path}")
        print("[!] If you want to force a fresh download, remove the existing file(s) first.")
        print("[!] Be careful in a forensic context: altering or deleting existing evidence may compromise integrity.")
        return

    # Decode base64 if needed
    if isinstance(data, str):
        try:
            data = base64.b64decode(data)
        except Exception as e:
            print(f"[!] Could not decode {filename}: {e}")
            return

    # --- Save PDF ---
    with open(path, "wb") as f:
        f.write(data)
    print(f"[+] Saved PDF: {path}")
    os.chmod(path, 0o444)  # read-only for everyone

    # Forensic metadata
    if mail:
        meta = {
            "uid": uid,
            "original_filename": filename,
            "saved_filename": os.path.basename(path),
            "content_type": mail.attachments[0].get("mail_content_type", ""),
            "subject": mail.subject,
            "from_header": mail.from_,
            "headers": dict(mail.headers),
            "hashes": compute_hashes(data)
        }

        # Detect sender mismatch
        env_sender = mail.headers.get("Return-Path")
        hdr_sender = mail.from_[0][1] if mail.from_ else None
        if env_sender and hdr_sender and env_sender.strip("<>") != hdr_sender:
            meta["sender_mismatch"] = {
                "from_header": hdr_sender,
                "envelope_sender": env_sender
            }

        # --- Save JSON ---
        with open(forensic_json_path, "w", encoding="utf-8") as jf:
            json.dump(meta, jf, indent=2, ensure_ascii=False)
        print(f"[+] Forensic metadata saved: {forensic_json_path}")
        os.chmod(forensic_json_path, 0o444)



def verify_p7m(p7m_file, PEC_CA_CERT):
    """Verify PEC .p7m signature using OpenSSL."""
    try:
        subprocess.run([
            "openssl", "smime", "-verify",
            "-in", p7m_file,
            "-CAfile", PEC_CA_CERT,
            "-out", "/dev/null"
        ], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        return True
    except subprocess.CalledProcessError:
        return False

def match_filters(mail):
    """Check CLI filters (subject/from)."""
    if args.subject and args.subject.lower() not in (mail.subject or "").lower():
        return False
    if args.from_:
        from_addresses = [addr[1].lower() for addr in mail.from_ or []]
        if not any(args.from_.lower() in addr for addr in from_addresses):
            return False
    return True

def parse_message(raw_message, uid=None, prefix="", check_filters=True):
    """Parse email, verify PEC, extract PDFs, store forensic JSON."""
    old_stderr = sys.stderr
    sys.stderr = io.StringIO()
    try:
        mail = mailparser.parse_from_bytes(raw_message)
    finally:
        sys.stderr = old_stderr

    if check_filters and not match_filters(mail):
        return False

    pdf_found = False
    for idx, attachment in enumerate(mail.attachments):
        ctype = attachment.get("mail_content_type", "")
        fname = attachment.get("filename") or f"attach_{idx}"

        # ---- Extract PDFs ----
        if ctype == "application/pdf" or fname.lower().endswith(".pdf"):
            save_pdf(fname, attachment["payload"], mail=mail, uid=uid, prefix=prefix)
            pdf_found = True

        # ---- PEC .p7m ----
        elif fname.lower().endswith(".p7m") or ctype == "application/pkcs7-mime":
            temp_file = os.path.join(tempfile.gettempdir(), fname)
            with open(temp_file, "wb") as f:
                f.write(attachment["payload"])

            # ---- Verify signature ----
            if verify_p7m(temp_file, PEC_CA_CERT):
                print(f"[+] PEC signature verified: {fname}")
                inner_eml_file = os.path.join(tempfile.gettempdir(), f"{fname}_inner.eml")
                subprocess.run([
                    "openssl", "smime", "-verify",
                    "-in", temp_file,
                    "-CAfile", PEC_CA_CERT,
                    "-out", inner_eml_file
                ], check=True)
                with open(inner_eml_file, "rb") as f:
                    inner_content = f.read()
                inner_pdf_found = parse_message(inner_content, uid=uid, prefix=f"{fname}_")
                pdf_found = pdf_found or inner_pdf_found
                os.unlink(inner_eml_file)
            else:
                print(f"[!] Invalid PEC signature, skipping: {fname}")
            os.unlink(temp_file)

        # ---- Nested .eml ----
        elif fname.lower().endswith(".eml"):
            nested_raw = attachment["payload"]
            if isinstance(nested_raw, str):
                nested_raw = nested_raw.encode()
            nested_pdf_found = parse_message(nested_raw, uid=uid, prefix=f"{fname}_")
            pdf_found = pdf_found or nested_pdf_found

    if not pdf_found:
        print("[*] No PDFs found.")
    return pdf_found

# -------------------- CLI --------------------
parser = argparse.ArgumentParser(
    description="Forensic PDF extraction from IMAP mailbox with PEC verification.",
    formatter_class=argparse.RawDescriptionHelpFormatter,
    epilog=textwrap.dedent("""\
        Examples:
          python script.py --subject "fattura" -n 5
          python script.py --from_ "pec@domain.it"
          python script.py -f "pec@domain.it" -s "fattura" -n 3
    """)
)
parser.add_argument("-s", "--subject", help="Filter emails by subject (case-insensitive).")
parser.add_argument("-f", "--from", dest="from_", help="Filter emails by sender email address.")
parser.add_argument("-n", type=int, default=10, help="Number of last emails to process (default 10).")
args = parser.parse_args()

# -------------------- IMAP Processing --------------------
with IMAPClient(HOST, ssl=True) as client:
    client.login(USERNAME, PASSWORD)
    client.select_folder("INBOX")

    if args.subject and args.from_:
        search_criteria = ["FROM", args.from_, "SUBJECT", args.subject]
    elif args.subject:
        search_criteria = ["SUBJECT", args.subject]
    elif args.from_:
        search_criteria = ["FROM", args.from_]
    else:
        search_criteria = ["ALL"]

    messages = client.search(search_criteria)
    last_n = messages[-args.n:] if args.n else messages
    print(f"[*] Found {len(last_n)} messages to process")

    for uid in last_n:
        raw_message = client.fetch(uid, ["RFC822"])[uid][b"RFC822"]
        parse_message(raw_message, uid=uid)
