import subprocess
import os
import re
import json
import requests
import datetime
import difflib
from PIL import Image
import numpy as np
import easyocr

def capture_frame(stream_url, output_file="current_frame.jpg"):
    """Cattura un frame dallo stream m3u8 via ffmpeg."""
    print(f"[*] Cattura frame da: {stream_url}...")
    try:
        cmd = ["ffmpeg", "-y", "-i", stream_url, "-frames:v", "1", "-q:v", "2", output_file]
        subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
        return True
    except Exception:
        return False

def parse_with_easyocr(image_path):
    """Esegue l'OCR con EasyOCR e raggruppa per riga."""
    # Inizializza il lettore (italiano e inglese)
    # Impostiamo gpu=False per sicurezza, EasyOCR lo userà se disponibile
    reader = easyocr.Reader(['it', 'en'], gpu=False)
    
    # Leggi l'immagine
    results = reader.readtext(image_path)
    
    rows = {}
    for (bbox, text, conf) in results:
        if conf < 0.2: continue
        
        # Calcola il centro Y e il top Y
        y_center = sum([p[1] for p in bbox]) / 4
        y_top = min([p[1] for p in bbox])
        x_left = min([p[0] for p in bbox])
        
        # Raggruppa per riga (tolleranza 35px)
        row_key = None
        for k in rows.keys():
            if abs(k - y_center) < 35:
                row_key = k
                break
        
        if row_key is None:
            row_key = y_center
            rows[row_key] = []
            
        rows[row_key].append({'text': text, 'x': x_left, 'y_top': y_top})

    final_output = []
    for y in sorted(rows.keys()):
        # Ordiniamo prima per coordinata top (Y) per distinguere Titolo (sopra) e Sottotitolo (sotto)
        # e poi per X (sinistra-destra)
        row_data = sorted(rows[y], key=lambda x: (x['y_top'], x['x']))
        full_text = " ".join([w['text'] for w in row_data])
        
        # Cerca il numero del canale
        channel_id = "X"
        if "EVENTS" in full_text.upper():
            match = re.search(r'EVENTS\s*(\d+)', full_text.upper())
            if match:
                channel_id = match.group(1)
        
        if channel_id == "X" and "EVENTS" not in full_text.upper():
            continue

        # Estrai Titolo, Sottotitolo e Orario
        top_words = []
        bottom_words = []
        row_time = ""
        
        # Calcoliamo la Y media della riga per dividere sopra/sotto
        y_avg = sum([w['y_top'] for w in row_data]) / len(row_data)
        
        for w in row_data:
            t = w['text']
            # Cerca l'orario (es. 01:00 o 22:00)
            if re.match(r'^\d{1,2}:\d{2}$', t):
                row_time = t
                continue
                
            # Filtri
            if "EVENTS" in t.upper(): continue
            if t.isdigit() and len(t) < 3: continue
            
            # Se la parola è significativamente più in basso della media, è sottotitolo
            if w['y_top'] > y_avg + 5: # Tolleranza di 5px
                bottom_words.append(t)
            else:
                top_words.append(t)
            
        if top_words:
            title = " ".join(top_words)
            subtitle = " ".join(bottom_words)
            
            if not subtitle:
                # Se non c'è una distinzione netta sopra/sotto, proviamo lo split per parole chiave
                content_words = top_words
                split_point = len(content_words)
                for idx, word in enumerate(content_words):
                    if word.upper() in ["NCAA", "PGA", "LIVE", "ROUND"]:
                        split_point = idx
                        break
                title = " ".join(content_words[:split_point])
                subtitle = " ".join(content_words[split_point:])

        if top_words:
            title = " ".join(top_words)
            subtitle = " ".join(bottom_words)
            
            if not subtitle:
                # Se non c'è una distinzione netta sopra/sotto, proviamo lo split per parole chiave
                content_words = top_words
                split_point = len(content_words)
                for idx, word in enumerate(content_words):
                    if word.upper() in ["NCAA", "PGA", "LIVE", "ROUND"]:
                        split_point = idx
                        break
                title = " ".join(content_words[:split_point])
                subtitle = " ".join(content_words[split_point:])

            final_output.append({
                'channel': channel_id,
                'time': row_time,
                'title': title
            })

    return final_output

import time
import json

def fetch_epg(date_str):
    """Scarica il JSON EPG per la data specificata."""
    url = f"https://epg.discovery.indazn.com/eu/v5/epgWithDatesRange?country=it&languageCode=it&openBrowse=true&timeZoneOffset=120&startDate={date_str}&endDate={date_str}&brand=dazn"
    print(f"[*] Aggiornamento EPG per il giorno: {date_str}...")
    try:
        r = requests.get(url, timeout=10)
        return r.json().get('Tiles', [])
    except Exception as e:
        print(f"[-] Errore recupero EPG: {e}")
        return []

def get_image_id(tile):
    """Estrae l'ID image-header dal tile EPG."""
    img = tile.get('Image', {})
    if isinstance(img, dict) and img.get('ImageType') == 'image-header':
        return img.get('Id')
    
    for field in ['Sport', 'Competition']:
        images = tile.get(field, {}).get('Images', [])
        for item in images:
            if item.get('ImageType') == 'image-header':
                return item.get('Id')
    return None

def match_epg_event(tiles, ocr_title, ocr_time):
    """Trova il miglior matching tra l'evento OCR e l'EPG."""
    if not ocr_time: return None
    
    best_match = None
    best_score = 0
    
    for tile in tiles:
        epg_title = tile.get('Title', '')
        epg_start = tile.get('EventStartTime', '') # ISO 8601 (UTC)
        if not epg_start: continue
        
        try:
            dt = datetime.datetime.fromisoformat(epg_start.replace('Z', '+00:00'))
            local_dt = dt + datetime.timedelta(minutes=120)
            epg_time = local_dt.strftime('%H:%M')
            
            ocr_h, ocr_m = map(int, ocr_time.split(':'))
            epg_h, epg_m = map(int, epg_time.split(':'))
            diff = abs((ocr_h * 60 + ocr_m) - (epg_h * 60 + epg_m))
            
            if diff > 15: continue
            
            score = difflib.SequenceMatcher(None, ocr_title.upper(), epg_title.upper()).ratio()
            if score > best_score:
                best_score = score
                best_match = tile
        except:
            continue
            
    if best_score > 0.5:
        return best_match
    return None

def main():
    url = "https://easydrm.net:2083/showcase/cgujgrtm8y9z/dazn/it/playlist.m3u8"
    local_file = "current_frame.jpg"
    
    last_epg_date = None
    epg_tiles = []
    
    print("[*] Bot avviato in modalità continua (check ogni 60s).")
    
    while True:
        try:
            now = datetime.datetime.now()
            today = now.date().isoformat()
            
            # Aggiorna EPG solo se cambia il giorno o è vuoto
            if today != last_epg_date or not epg_tiles:
                epg_tiles = fetch_epg(today)
                last_epg_date = today

            print(f"\n[{now.strftime('%H:%M:%S')}] Avvio check vetrina...")
            
            # Pulizia frame precedente
            if os.path.exists(local_file):
                try: os.remove(local_file)
                except: pass

            if not capture_frame(url, local_file):
                print("[-] Impossibile ottenere il frame live.")
                if os.path.exists("preview.jpg"):
                    local_file = "preview.jpg"
                else:
                    time.sleep(60)
                    continue

            events = parse_with_easyocr(local_file)
            
            output_json = {}
            for ev in events:
                match = match_epg_event(epg_tiles, ev['title'], ev['time'])
                img_id = get_image_id(match) if match else None
                
                # Recomposizione URL immagine
                img_url = "NON TROVATO"
                if img_id:
                    img_url = f"https://image.discovery.indazn.com/eu/v3/eu/none/{img_id}/fill/none/top/none/100/1920/1080/webp/image"
                
                channel_key = f"DaznEvents{ev['channel']}"
                output_json[channel_key] = {
                    "Evento": ev['title'],
                    "Orario": ev['time'],
                    "UrlFoto": img_url
                }
            
            # Salva in JSON
            with open("showcase.json", "w", encoding="utf-8") as f:
                json.dump(output_json, f, indent=4, ensure_ascii=False)
            
            print(f"[+] Scan completato. {len(output_json)} eventi trovati.")
            
        except Exception as e:
            print(f"[-] Errore nel ciclo: {e}")
            
        time.sleep(60)

if __name__ == "__main__":
    main()
