o
    *
jC                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlmZmZm	Z	 ddl
mZ ddlmZ ddlmZ edZeejdrGejjd	d
 eejdrTejjd	d
 eejZed Zed Zejejdejed	d
eejgd eeZ dZ!de"de"fddZ#de"de"fddZ$de"de"de%fddZ&de'de(e' de)e'dB e%f fddZ*de'dB fdd Z+de'dB fd!d"Z,d#e'de"fd$d%Z-de'dB fd&d'Z.de(e' dB fd(d)Z/d*d+ Z0ed,kre0  dS dS )-a  
DAZN EPG Merger
Combina i dati dell'EPG ufficiale DAZN con i canali estratti via OCR dal flusso live.
Usa fuzzy matching per associare i titoli OCR (spesso incompleti/sbagliati)
con quelli originali DAZN.

Output: EPG finale con Nome, Immagine, Canale, Ora inizio/fine.
    N)datetimetimezone	timedelta)ZoneInfo)SequenceMatcher)PathzEurope/Romereconfigureutf-8encodingepg_datazdazn_merger.logz'%(asctime)s [%(levelname)s] %(message)s)levelformathandlers?utc_strreturnc              	   C   sJ   | sdS zt | dd}|t}| W S  ttfy$   |  Y S w )z
    Converte un orario UTC (es. '2026-05-16T15:00:00Z') in ora italiana.
    Gestisce automaticamente CET (UTC+1) e CEST (UTC+2) grazie a zoneinfo.
    Restituisce formato ISO con offset, es. '2026-05-16T17:00:00+02:00'.
     Z+00:00)r   fromisoformatreplace
astimezoneIT_TZ	isoformat
ValueError	TypeError)r   dt_utcdt_it r   7/var/www/addictedbytheproject.nl/epg/dazn_epg_merger.pyutc_to_italian2   s   

r!   textc                 C   sZ   t d| } ddd | D } |  } tdd| } tdd| } tdd|  } | S )	u   
    Normalizza il testo per il confronto fuzzy:
    - Rimuove accenti (Nürburgring -> Nurburgring)
    - Lowercase
    - Rimuove punteggiatura e caratteri speciali
    - Rimuove spazi multipli
    NFKDr   c                 s   s    | ]
}t |s|V  qd S )N)unicodedata	combining).0cr   r   r    	<genexpr>N   s    z!normalize_text.<locals>.<genexpr>z[|,.\-:;!?'\"()\[\]{}] z\.{2,}z\s+)r$   	normalizejoinlowerresubstrip)r"   r   r   r    normalize_textD   s   	r0   	ocr_title	epg_titlec                 C   s  t | }t |}|r|sdS td|| }t|t|krJd}t|}tt|| d D ]}||||  }td|| }	|	|krF|	}q.|}
n|}
t| }t| }dd |D }dd |D }|rst||@ t| }nd}|d |
d  |d	  }|S )
z
    Calcola la similarita' tra un titolo OCR e uno dell'EPG.
    Usa una combinazione di:
    1. SequenceMatcher ratio (match globale)
    2. Partial matching (il titolo OCR e' spesso troncato)
    3. Bonus per parole chiave in comune
            N   c                 S      h | ]
}t |d kr|qS    lenr&   wr   r   r    	<setcomp>       z%compute_similarity.<locals>.<setcomp>c                 S   r5   r6   r8   r:   r   r   r    r<      r=   g      ?r   g333333?)r0   r   ratior9   rangesetsplit)r1   r2   norm_ocrnorm_epgglobal_ratiobest_partialocr_leniwindowr>   partial_ratio	ocr_words	epg_wordsword_overlapscorer   r   r    compute_similarity_   s2   rN   	ocr_event	epg_tilesc                 C   sv   |  dd}|rt|dk rdS d}d}|D ]}| dd}|s!qt||}||kr.|}|}q|tkr7||fS d|fS )z
    Trova il miglior tile EPG che corrisponde all'evento OCR.
    Restituisce (tile, score) o (None, 0.0) se nessun match valido.
    titler      )Nr3   Nr3   Title)getr9   rN   MATCH_THRESHOLD)rO   rP   r1   	best_tile
best_scoretiler2   rM   r   r   r    find_best_match   s"   
rY   c                  C   s   t  d} td|  d }| s(ttd}|r!|d }ntd dS t	d|j
  t|d	d
d}t|W  d   S 1 sHw   Y  dS )zCarica l'EPG DAZN piu' recente.z%Y-%m-%d	dazn_epg_z.jsonzdazn_epg_????-??-??.jsonz&[MERGER] Nessun file EPG DAZN trovato!Nz[EPG] Caricato: rr	   r
   )r   nowstrftimeEPG_DIRexistssortedglobloggererrorinfonameopenjsonload)todayepg_pathfilesfr   r   r    load_dazn_epg   s   

$rn   c                  C   sZ   t d } |  std dS t| ddd}t|W  d   S 1 s&w   Y  dS )z(Carica gli eventi live estratti via OCR.zdazn_live_events.jsonz0[MERGER] File dazn_live_events.json non trovato!Nr\   r	   r
   )r_   r`   rc   rd   rg   rh   ri   )	live_pathrm   r   r   r    load_live_events   s   
$rp   rX   c                 C   s   dD ]}|  |}|rt|tr| dr|d   S q|  di }|rAt|trA| dg }|D ]}|r@| dr@|d   S q1dS )z(Estrae l'URL immagine migliore dal tile.)Image	HeroImage
PromoImageBackgroundImageUrlCompetitionImagesr   )rT   
isinstancedict)rX   fieldimgcompimagesr   r   r    get_image_url   s   
r~   c                  C   sj   t d } |  r3z t| ddd}t|W  d   W S 1 s!w   Y  W dS  ty2   Y dS w dS )z,Carica l'EPG unito precedente come fallback.dazn_merged_epg.jsonr\   r	   r
   N)r_   r`   rg   rh   ri   	Exception)merged_pathrm   r   r   r    load_previous_merged   s   $r   c            (         s  t  } | sdS t }|sdS | dg }|dg }t }|r%|dg ng }tdt| dt| d tt	j
}g }|D ]V}zI|dd}	|d	d}
|	sTW qAt|	d
d}|
sg|tdd }n	t|
d
d}|tdd |  kr|tdd krn n|| W qA ttfy   Y qAw tdt|  t|t|kr|n|}dd |D }tdt|  g }t }|D ]}|dd }d|v rd|d< d|d< t||\}}|rd|d  v rd|d< d|d< |r|dd  |v r fdd|D }t||\}}|r|dd |r |vr|  |dg }d|dd v }t|dkr|s|d t||d dd|d dd|ddt|ddt|d	dt|d|ddd	}|| td |d!d"|d  d#|d  d$|d dd d%|d dd 
 nI|d t||dd|ddt|ddt|d	dt|d|ddd&}|| td |d!d"|d  d#|d  d'|dd  qd}|d}|rP|D ]W}|d|krN|d(}	|d)}
|	rN|
rNz0t|	}t|
}t|j}|td*d+ |  kr9|td*d+ kr@n n|}W  nW q tyM   Y qw q|rtd,| d-|d   ||d|d.|dd|dd|d(|d)|d/d0|ddd& qtd1|d!d"|dd d2 ||dd3d|dd|ddddd4|ddd& q|jd5d6 d7 t  d8t||d9}td: }t|d;d<d=} tj|| d>dd? W d   n	1 sw   Y  td@|  t dAdB  t dCt !dD  t dEt| dF t dB  t dEdGdHdIdJdKdLdMdNdEdOdKdP	 t dEdQ  |D ]}!|!dd3}"|!d(d}|!d)d}d}#d}$|ruzt|}%|%!dR}#W n tyt   |ddS }#Y nw |rzt|}%|%!dR}$W n ty   |ddS }$Y nw |!d/d}|!dd3}&|!d.rdTnd}'t dE|"dHdI|#dKdL|$dNdE|dUdE|& |'  q=t dB dA |S )VzV
    Pipeline principale: carica EPG + OCR, fa il matching, produce l'EPG finale.
    NTileseventsz[MERGER] EPG DAZN: z tiles, OCR:  eventiStartr   Endr   r      )hoursr7   z2[MERGER] Tiles attualmente in onda (con margine): c                 S   s"   g | ]}d | dpdvr|qS )z3000-r   r   rT   r&   tr   r   r    
<listcomp>%  s   " zmerge_epg.<locals>.<listcomp>z.[MERGER] Tiles dopo filtro canali permanenti: rQ   zzona serie a
ZonaSerieAchannelr   channel_numberrS   Idc                    s   g | ]}| d  kr|qS )r   r   r   tile_idr   r    r   @  s    Contestantsr4   rR   )	rQ   imager   channel2r   startendmatch_scorer1   z[MATCH] z.2fz	 | OCR: "z" -> EPG: "z" | CH: z, )rQ   r   r   r   r   r   r   r1   z" | r   r      )minutesz/[CACHE] Recuperato da scansione precedente per z: r   r   g      ?z[NO MATCH] score=z" - nessun fallback disponibile?r3   c                 S   s   |  ddS )Nr   r   r   )er   r   r    <lambda>  s    zmerge_epg.<locals>.<lambda>)keydazn_merged_epg)	timestampsourcetotal_eventsr   r   r;   r	   r
   F)ensure_asciiindentz[MERGER] EPG finale salvato: 
zF======================================================================z  DAZN EPG MERGED - z%d/%m/%Y %H:%Mz  z eventi associatiCHz<12r)   INIZIOz>5-FINEz<5SCOREz  EVENTOz@----------------------------------------------------------------z%H:%M   z [IMG]z>5.2f)"rn   rp   rT   r   rc   re   r9   r   r]   r   utcr   r   r   appendr   r   r@   r,   rY   addr~   r!   roundtzinfowarningsortr   r_   rg   rh   dumpprintr^   )(r   	live_datatiles
ocr_eventsprev_merged_dataprev_eventsr]   
live_tilesrX   	start_strend_strr   r   search_tilesmerged_events
used_tilesocr_evocr_title_lowerrV   rM   	remainingcontestantsis_events_channelevent_entryfallback_eventchanprev_evsten	now_localoutputoutput_pathrm   evchstart_hmend_hmdtrQ   has_imgr   r   r    	merge_epg   sl   ,

















0











*

6r   c                  C   sb   t d t d t d t d t d t d t } | r*tdt|  d d S td d S )Nr   z&  ====================================z    DAZN EPG Mergerz#    Associa eventi OCR all'EPG DAZNz[DONE] Merge completato: r   z[DONE] Merge fallito)r   r   rc   re   r9   rd   )resultr   r   r    main  s   r   __main__)1__doc__sysiorh   r-   r$   loggingr   r   r   zoneinfor   difflibr   pathlibr   r   hasattrstdoutr   stderr__file__parentBASE_DIRr_   LOG_FILEbasicConfigINFOFileHandlerStreamHandler	getLogger__name__rc   rU   strr!   r0   floatrN   ry   listtuplerY   rn   rp   r~   r   r   r   r   r   r   r    <module>   s`    	


3
  j
