import os
import time
import psycopg2
import logging
from celery import Celery

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# DB Config
DB_HOST = os.getenv('DB_HOST', 'db')
DB_PORT = os.getenv('DB_PORT', '5432')
DB_USER = os.getenv('DB_USER', 'wuyuxuan')
DB_PASS = os.getenv('DB_PASSWORD', '1234567890')
DB_NAME = os.getenv('DB_NAME', 'postgres')

# Celery Config
REDIS_HOST = os.getenv('REDIS_HOST', 'redis')
REDIS_PORT = os.getenv('REDIS_PORT', '6379')
BROKER_URL = f"redis://{REDIS_HOST}:{REDIS_PORT}/0"

app = Celery('cron_scheduler', broker=BROKER_URL)

def get_db_connection():
    try:
        conn = psycopg2.connect(
            host=DB_HOST,
            port=DB_PORT,
            user=DB_USER,
            password=DB_PASS,
            dbname=DB_NAME
        )
        return conn
    except Exception as e:
        logger.error(f"Error connecting to database: {e}")
        return None

def check_and_queue_tasks():
    conn = get_db_connection()
    if not conn:
        return

    try:
        cur = conn.cursor()
        
        # Check for pending publications
        # Condition: file exists AND (status is pending OR status is null)
        # Note: We check if ANY resource is missing by checking if any _url column is NULL
        query = """
            SELECT id, full_paper_url, 
                   audio_url, video_url, mental_map_url, report_url, flashcard_url, 
                   quiz_url, infografica_url, presentation_url, datatable_url
            FROM publication_schema.publications
            WHERE full_paper_url IS NOT NULL 
              AND (task_status = 'pending' OR task_status IS NULL)
        """
        
        cur.execute(query)
        rows = cur.fetchall()
        
        logger.info(f"Found {len(rows)} pending publications.")
        
        for row in rows:
            pub_id = str(row[0])
            full_paper_filename = row[1]
            
            # Determine missing resources
            current_urls = {
                'audio': row[2],
                'video': row[3],
                'mental_map': row[4],
                'report': row[5],
                'flashcard': row[6],
                'quiz': row[7],
                'infographic': row[8],
                'presentation': row[9],
                'datatable': row[10]
            }
            
            needed_types = []
            for type_key, url in current_urls.items():
                if url is None:
                    needed_types.append(type_key)
            
            if not needed_types:
                # If nothing needed, mark completed
                cur.execute("UPDATE publication_schema.publications SET task_status = 'completed' WHERE id = %s", (pub_id,))
                conn.commit()
                logger.info(f"Publication {pub_id} has all resources. Marked as completed.")
                continue

            # Prepare task arguments
            # Note: The worker mounts ./assets:/app/outputs. The file is relative to assets root.
            # ContentAutoGenerator expects absolute path OR handles relative.
            # Let's assume absolute path inside container: /app/outputs/{filename}
            pdf_path = os.path.join('/app/outputs', full_paper_filename)
            
            task_kwargs = {
                'pdf_path': pdf_path,
                'resource_types': needed_types,
                'output_dir': '/app/outputs',
                'publication_id': pub_id  # Pass ID for DB update
            }
            
            # Update status to processing BEFORE pushing to avoid race conditions
            cur.execute("UPDATE publication_schema.publications SET task_status = 'processing' WHERE id = %s", (pub_id,))
            conn.commit()
            
            # Send task
            # Task name 'generate_content' from celery_integration.py
            app.send_task('generate_content', kwargs=task_kwargs)
            
            logger.info(f"Queued task for Publication {pub_id}. Types: {needed_types}")
            
    except Exception as e:
        logger.error(f"Error in check_and_queue_tasks: {e}")
        if conn:
            conn.rollback()
    finally:
        if conn:
            conn.close()

if __name__ == "__main__":
    logger.info("Starting Cron Scheduler...")
    while True:
        check_and_queue_tasks()
        logger.info("Sleeping for 24 hours...")
        time.sleep(86400) 
