"""
EPG Bot — Main orchestrator.

Fetches the complete daily EPG at midnight (or on startup) and saves it to epg_daily.json.
Then, runs a local loop every 60 seconds to extract currently airing programs
and saves them to epg.json in real-time.

Usage:
    python epg_bot.py              # Run continuously (updates epg.json every 1 min)
    python epg_bot.py --once       # Single scrape + generate epg.json once, then exit
"""
import argparse
import json
import logging
import os
import sys
import time
from datetime import date, datetime, timedelta
from zoneinfo import ZoneInfo

# Reconfigure stdout/stderr to use UTF-8 encoding on Windows to prevent UnicodeEncodeError with emojis
if hasattr(sys.stdout, 'reconfigure'):
    sys.stdout.reconfigure(encoding='utf-8')
if hasattr(sys.stderr, 'reconfigure'):
    sys.stderr.reconfigure(encoding='utf-8')

import config
from scrapers import RaiScraper, MediasetScraper, SkyScraper
import urllib.request
import re

# Cache for asset map and last fetch time
ASSET_MAP_CACHE = None
ASSET_MAP_LAST_FETCH = 0

def get_asset_map() -> dict:
    global ASSET_MAP_CACHE, ASSET_MAP_LAST_FETCH
    current_time = time.time()
    
    # Refresh cache every 10 minutes
    if ASSET_MAP_CACHE is not None and (current_time - ASSET_MAP_LAST_FETCH) < 600:
        return ASSET_MAP_CACHE
        
    asset_map = {}
    try:
        url = "https://addictedbytheproject.nl/epg_general/assets/"
        req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
        html = urllib.request.urlopen(req, timeout=10).read().decode('utf-8')
        links = re.findall(r'href="([^"]+)"', html)
        for link in links:
            if link.startswith('?') or link.startswith('/'):
                continue
            name_without_ext = link.rsplit('.', 1)[0].lower()
            asset_map[name_without_ext] = link
            
        ASSET_MAP_CACHE = asset_map
        ASSET_MAP_LAST_FETCH = current_time
        logger.info(f"🌐 Lista assets aggiornata: trovate {len(asset_map)} immagini fallback.")
    except Exception as e:
        logger.warning(f"⚠️ Impossibile recuperare lista assets: {e}")
        if ASSET_MAP_CACHE is None:
            ASSET_MAP_CACHE = {}
            
    return ASSET_MAP_CACHE


# ============================================================
# Logging Setup
# ============================================================
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    handlers=[
        logging.StreamHandler(sys.stdout),
    ],
)
logger = logging.getLogger(__name__)

ROME_TZ = ZoneInfo("Europe/Rome")
DAILY_EPG_FILE = "epg_daily.json"


def create_scrapers():
    """Initialize all scrapers."""
    return [
        RaiScraper(),
        MediasetScraper(),
        SkyScraper(),
    ]


def fill_epg_gaps(entries: list[dict], expected_channels: list[str], target_date: date) -> list[dict]:
    """Fill schedule gaps with a default 'Nessun Programma Disponibile' event."""
    channel_programs = {ch: [] for ch in expected_channels}
    
    for prog in entries:
        channel = prog.get("channel")
        if channel in channel_programs:
            channel_programs[channel].append(prog)
        elif channel:
            channel_programs[channel] = [prog]
            
    start_of_day = datetime(target_date.year, target_date.month, target_date.day, 0, 0, 0, tzinfo=ROME_TZ)
    end_of_day = start_of_day + timedelta(days=1)
    
    filled_entries = []
    
    for channel, progs in channel_programs.items():
        if not progs:
            filled_entries.append({
                "title": "Nessun Programma Disponibile",
                "image": "",
                "channel": channel,
                "start": start_of_day.isoformat(),
                "end": end_of_day.isoformat(),
                "description": "Nessuna programmazione disponibile per l'intera giornata.",
                "category": "Intrattenimento"
            })
            continue
            
        # Parse and sort by start time
        valid_progs = []
        for p in progs:
            try:
                dt = datetime.fromisoformat(p.get("start", ""))
                valid_progs.append((dt, p))
            except Exception:
                pass
                
        valid_progs.sort(key=lambda x: x[0])
        
        # Start tracking from start_of_day
        current_time = start_of_day
        
        for prog_start, prog in valid_progs:
            # If gap > 5 minutes, fill it
            if prog_start > current_time + timedelta(minutes=5):
                filled_entries.append({
                    "title": "Nessun Programma Disponibile",
                    "image": "",
                    "channel": channel,
                    "start": current_time.isoformat(),
                    "end": prog_start.isoformat(),
                    "description": "Nessuna programmazione disponibile in questa fascia oraria.",
                    "category": "Intrattenimento"
                })
            
            filled_entries.append(prog)
            
            # Advance current time to the end of this program
            end_str = prog.get("end")
            if end_str:
                try:
                    prog_end = datetime.fromisoformat(end_str)
                    # Use max to avoid programs that incorrectly end before they start
                    current_time = max(current_time, prog_end)
                except Exception:
                    current_time = max(current_time, prog_start + timedelta(hours=1))
            else:
                current_time = max(current_time, prog_start + timedelta(hours=1))
                
        # Fill gap at the end of the day if needed
        if current_time < end_of_day - timedelta(minutes=5):
            filled_entries.append({
                "title": "Nessun Programma Disponibile",
                "image": "",
                "channel": channel,
                "start": current_time.isoformat(),
                "end": (current_time + timedelta(hours=4)).isoformat(), # Pad with 4 hours
                "description": "Nessuna programmazione disponibile in questa fascia oraria.",
                "category": "Intrattenimento"
            })

    return filled_entries


def fetch_daily_epg_from_web(scrapers, target_date: date) -> dict:
    """
    Fetch EPG data from all providers for the target date
    and return grouped structured dict.
    """
    daily_data = {
        "date": target_date.strftime("%Y-%m-%d"),
        "rai": [],
        "mediaset": [],
        "sky": []
    }
    stats = {}

    for scraper in scrapers:
        provider = scraper.get_provider_name()
        try:
            logger.info(f"{'='*50}")
            logger.info(f"Scraping EPG da {provider.upper()}...")
            entries = scraper.fetch_all_channels(target_date)
            
            # Serialize entries to dictionaries immediately
            provider_entries = [entry.to_dict() for entry in entries]
            
            # Fill EPG gaps with 'Nessun Programma Disponibile'
            expected_channels = list(scraper.get_channels().values())
            provider_entries = fill_epg_gaps(provider_entries, expected_channels, target_date)
            
            daily_data[provider] = provider_entries
            stats[provider] = len(provider_entries)
            logger.info(f"[{provider}] Totale: {len(provider_entries)} programmi recuperati")
        except Exception as e:
            logger.error(f"[{provider}] ERRORE CRITICO DURANTE LO SCRAPING: {e}")
            daily_data[provider] = []
            stats[provider] = 0

    # Log summary
    logger.info(f"{'='*50}")
    logger.info("📊 Riepilogo Scraping Giornaliero:")
    total = 0
    for provider, count in stats.items():
        logger.info(f"  {provider.upper()}: {count} programmi")
        total += count
    logger.info(f"  TOTALE: {total} programmi")

    return daily_data


def save_daily_epg(data: dict, output_file: str):
    """Save daily EPG database to JSON file."""
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    logger.info(f"💾 Guida giornaliera salvata in: {output_file}")


def load_daily_epg(input_file: str) -> dict | None:
    """Load daily EPG database from JSON file if it exists."""
    if not os.path.exists(input_file):
        return None
    try:
        with open(input_file, "r", encoding="utf-8") as f:
            return json.load(f)
    except Exception as e:
        logger.warning(f"⚠️  Impossibile caricare {input_file}: {e}")
        return None


def get_current_programs(daily_data: dict, current_time: datetime) -> list[dict]:
    """
    Extract currently airing program for each channel from the daily guide database.
    """
    channel_programs = {}
    
    # Collect all programs grouped by channel name
    for provider in ["rai", "mediaset", "sky"]:
        programs = daily_data.get(provider, [])
        for prog in programs:
            channel = prog.get("channel")
            if not channel:
                continue
            if channel not in channel_programs:
                channel_programs[channel] = []
            # Make a copy to avoid mutating the cached database
            channel_programs[channel].append(prog.copy())

    current_programs = []

    for channel, progs in channel_programs.items():
        # Sort by start time
        progs.sort(key=lambda p: p.get("start", ""))
        
        # Calculate missing end times dynamically based on next program start time
        for i in range(len(progs)):
            prog = progs[i]
            if not prog.get("end"):
                if i + 1 < len(progs):
                    prog["end"] = progs[i + 1].get("start", "")
                else:
                    # If it's the last program of the day, default to start + 2 hours
                    try:
                        start_dt = datetime.fromisoformat(prog["start"])
                        end_dt = start_dt + timedelta(hours=2)
                        prog["end"] = end_dt.isoformat()
                    except Exception:
                        pass

        # Identify currently airing program
        for prog in progs:
            start_str = prog.get("start", "")
            end_str = prog.get("end", "")
            if not start_str or not end_str:
                continue
            
            try:
                start_dt = datetime.fromisoformat(start_str)
                end_dt = datetime.fromisoformat(end_str)
                
                # Check if current_time falls within [start, end)
                if start_dt <= current_time < end_dt:
                    # Sanitize Sky broken images (missing checksum = 403 Forbidden)
                    img = prog.get("image", "")
                    if img and "guidatv.sky.it" in img and "checksum" not in img.lower() and "md5" not in img.lower():
                        img = ""
                    
                    # Apply custom fallback for missing images by verifying against asset map
                    if not img:
                        chan = prog.get("channel", "")
                        fmt = chan.lower().replace(" ", "_").replace("+", "plus")
                        
                        # Apply known aliases for asset matching
                        if "sky_sport_25" in fmt:
                            fmt = "sky_sport"
                        elif fmt == "sky_tv8" or fmt == "sky_tv8_hd":
                            fmt = "tv8"
                            
                        aliases = {
                            "italia_1": "italia_uno",
                            "rai_sport_plus": "raisport",
                            "rai_news_24": "rai_news",
                            "20": "canale_20",
                            "boing": "mediaset_boing",
                            "tgcom24": "mediaset_tgcom24",
                            "cine34": "cine_34",
                            "twenty_seven": "mediaset_27",
                            "sky_cinema_suspense": "sky_cinema_suspance",
                            "sky_sport_formula1": "sky_sport_f1",
                            "history": "sky_history",
                            "mtv": "sky_mtv",
                            "nickjr": "nick_jr"
                        }
                        fmt = aliases.get(fmt, fmt)
                        
                        asset_map = get_asset_map()
                        
                        # Look for exact match or match without underscores
                        if fmt in asset_map:
                            prog["image"] = f"https://addictedbytheproject.nl/epg_general/assets/{asset_map[fmt]}"
                        elif fmt.replace("_", "") in asset_map:
                            prog["image"] = f"https://addictedbytheproject.nl/epg_general/assets/{asset_map[fmt.replace('_', '')]}"
                        else:
                            prog["image"] = "" # No valid fallback found

                    current_programs.append(prog)
                    break  # Found active program for this channel
            except Exception:
                continue

    # Sort final current EPG list by channel name
    current_programs.sort(key=lambda p: p.get("channel", ""))
    return current_programs


def save_current_epg(data: list[dict], output_file: str):
    """Save active EPG list to real-time output JSON file."""
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    logger.info(f"💾 EPG in onda aggiornato in: {output_file} ({len(data)} canali attivi)")


def run_once(scrapers):
    """Execute a single network scrape, then write daily and current EPG files, and exit."""
    now = datetime.now(ROME_TZ)
    today = now.date()

    logger.info(f"🚀 Avvio raccolta EPG singola — {now.strftime('%d/%m/%Y %H:%M:%S')}")
    logger.info(f"📅 Data target: {today.strftime('%d/%m/%Y')}")

    daily_data = fetch_daily_epg_from_web(scrapers, today)
    save_daily_epg(daily_data, DAILY_EPG_FILE)
    
    current_programs = get_current_programs(daily_data, now)
    save_current_epg(current_programs, config.OUTPUT_FILE)
    
    logger.info(f"✅ Ciclo singolo completato con successo!")


def run_continuous(scrapers):
    """Run EPG bot continuously with daily network scraping and minutely local updates."""
    logger.info("🔄 Modalità continua — Aggiornamento real-time ogni 60 secondi")

    daily_data = None

    while True:
        try:
            now = datetime.now(ROME_TZ)
            today_str = now.strftime("%Y-%m-%d")

            # Check if we have valid cached daily EPG for today, otherwise trigger network fetch
            if daily_data is None or daily_data.get("date") != today_str:
                logger.info(f"📅 Verifica EPG giornaliero per la data: {today_str}...")
                
                # Attempt to load daily EPG from local file first to save API hits
                daily_data = load_daily_epg(DAILY_EPG_FILE)
                
                if daily_data is None or daily_data.get("date") != today_str:
                    logger.info("📂 File locale assente o scaduto. Avvio scraping di rete...")
                    daily_data = fetch_daily_epg_from_web(scrapers, now.date())
                    save_daily_epg(daily_data, DAILY_EPG_FILE)
                else:
                    logger.info("📂 EPG giornaliero valido caricato con successo da file locale! (0 chiamate API)")

            # Extract currently airing program for each channel
            current_programs = get_current_programs(daily_data, now)
            save_current_epg(current_programs, config.OUTPUT_FILE)

        except KeyboardInterrupt:
            logger.info("⏹️  Bot arrestato dall'utente.")
            break
        except Exception as e:
            logger.error(f"❌ Errore nel ciclo principale: {e}", exc_info=True)

        # Wait exactly 60 seconds before refreshing the currently airing EPG
        logger.info("⏳ In attesa del prossimo minuto per aggiornare i programmi in onda...")
        try:
            time.sleep(60)
        except KeyboardInterrupt:
            logger.info("⏹️  Bot arrestato dall'utente.")
            break


def main():
    parser = argparse.ArgumentParser(
        description="EPG Bot Italia — Rai, Mediaset, Sky (Real-time & Daily modes)"
    )
    parser.add_argument(
        "--once",
        action="store_true",
        help="Esegui una singola raccolta completa e termina",
    )
    parser.add_argument(
        "--output",
        type=str,
        default=None,
        help=f"File di output in tempo reale (default: {config.OUTPUT_FILE})",
    )
    args = parser.parse_args()

    if args.output:
        config.OUTPUT_FILE = args.output

    logger.info("=" * 60)
    logger.info("  📺 EPG Bot Italia — Real-time")
    logger.info("  Canali: Rai • Mediaset • Sky")
    logger.info("=" * 60)

    scrapers = create_scrapers()

    if args.once:
        run_once(scrapers)
    else:
        run_continuous(scrapers)


if __name__ == "__main__":
    main()
