#!/bin/bash
# Forensic wrapper for peepdf generating human-readable TXT report
set -euo pipefail

PDF_FOLDER="/app/pdfs/files"
FORENSIC_FOLDER="/app/pdfs/forensic_copy"
mkdir -p "$FORENSIC_FOLDER"

# --- Argument check ---
if [[ $# -eq 0 ]]; then
    echo "[!] Usage: peepdf -h | peepdf <filename.pdf>"
    exit 1
fi

ARG="$1"
shift

# Allow only help
if [[ "$ARG" == "-h" ]]; then
    cd /app/peepdf-3/peepdf || exit 1
    python3 peepdf.py -h
    exit 0
fi

# Reject additional flags
if [[ $# -ne 0 ]]; then
    echo "[!] Only 'peepdf -h' or 'peepdf <filename.pdf>' are allowed"
    exit 1
fi

# Ensure original file exists
ORIGINAL="$PDF_FOLDER/$ARG"
if [[ ! -f "$ORIGINAL" ]]; then
    echo "[!] File not found: $ORIGINAL"
    exit 1
fi

# --- Create forensic copy ---
COPY="$FORENSIC_FOLDER/$ARG"
cp "$ORIGINAL" "$COPY"

# --- Compute pre-analysis hashes ---
SHA256_BEFORE=$(sha256sum "$COPY" | awk '{print $1}')
MD5_BEFORE=$(md5sum "$COPY" | awk '{print $1}')

# --- Run peepdf and capture output ---
OUTPUT_LOG="$FORENSIC_FOLDER/${ARG}_peepdf.log"
cd /app/peepdf-3/peepdf || exit 1
script -q -c "python3 /app/peepdf-3/peepdf/peepdf.py -i '$COPY'" "$OUTPUT_LOG"

# --- Compute post-analysis hashes ---
SHA256_AFTER=$(sha256sum "$COPY" | awk '{print $1}')
MD5_AFTER=$(md5sum "$COPY" | awk '{print $1}')

# --- Prepare fetch_email section ---
FETCH_JSON="$FORENSIC_FOLDER/${ARG}.json"
if [[ -f "$FETCH_JSON" ]]; then
    FETCH_SECTION="=== FETCH_EMAIL METADATA ===
$(cat "$FETCH_JSON")"
    # Compute JSON hashes
    JSON_SHA256=$(sha256sum "$FETCH_JSON" | awk '{print $1}')
    JSON_MD5=$(md5sum "$FETCH_JSON" | awk '{print $1}')
    JSON_HASH_SECTION="JSON SHA256 : $JSON_SHA256
JSON MD5    : $JSON_MD5"
else
    FETCH_SECTION="=== FETCH_EMAIL METADATA ===
No fetch_email metadata available."
    JSON_HASH_SECTION="JSON SHA256 : N/A
JSON MD5    : N/A"
fi

# --- Prepare peepdf section (cleaned for report) ---
PEEPDF_SECTION="=== PEEPDF OUTPUT ===
$(sed -r 's/\x1B\[[0-9;]*[JKmsu]//g' "$OUTPUT_LOG")"

# --- Prepare forensic hashes section ---
HASH_SECTION="=== FORENSIC HASHES ===
Original file: $ORIGINAL
Forensic copy: $COPY
SHA256 before: $SHA256_BEFORE
MD5 before  : $MD5_BEFORE
SHA256 after : $SHA256_AFTER
MD5 after   : $MD5_AFTER
Hash match  : $( [[ "$SHA256_BEFORE" == "$SHA256_AFTER" && "$MD5_BEFORE" == "$MD5_AFTER" ]] && echo "True" || echo "False")"

# --- Combine all sections into TXT report ---
REPORT_TXT="$FORENSIC_FOLDER/${ARG}_report.txt"
{
    echo "$FETCH_SECTION"
    echo
    echo "$JSON_HASH_SECTION"
    echo
    echo "$PEEPDF_SECTION"
    echo
    echo "$HASH_SECTION"
} > "$REPORT_TXT"

# --- Compute hashes of the report itself ---
REPORT_SHA256=$(sha256sum "$REPORT_TXT" | awk '{print $1}')
REPORT_MD5=$(md5sum "$REPORT_TXT" | awk '{print $1}')

echo "[+] Forensic report saved: $REPORT_TXT"
echo "    SHA256: $REPORT_SHA256"
echo "    MD5   : $REPORT_MD5"
