o
    eix                     @   s  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z	d dlZd dlZd dlZd dlZd dlZd dlmZmZmZ d dlmZmZmZmZ d dlmZ d dlZd dlmZmZmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d d	l%m&Z& d d
l'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9 ddl:m;Z;m<Z< ddl<m=Z= ddl>m?Z?m@Z@mAZAmBZBmCZC ddlDmEZE eFeGZHdaIeeJeKeLeMf   eNd< daOePeNd< daQeeKeReeL f  eNd< e(eGdZSe(eGdZTeJe ZUe Vddd gZWg d!ZXejYd"ePfd#d$ZZ		dxd%eJe? d&ePd'eeL d"dfd(d)Z[d*eJe? d"e j\fd+d,Z]		 dyd%eeU d-eKeLeLf d.eeL d/eRd"df
d0d1Z^d-eKeLeLf d"eKeLeWf fd2d3Z_d4ej j.d*eUd"dfd5d6Z`ejad"ed fd7d8Zbi aceKeLeKeLeJeL f f eNd9< i adeKeLeJeL f eNd:< daeeeR eNd;< i afeKeLeLf eNd<< i ageKeLeJeL f eNd=< d aheReNd>< dzd?d@Ziejad"ed fdAdBZjG dCdD dDZkG dEdF dFZld%eUd"dfdGdHZmd%eUd"dfdIdJZndKeJeeLdf  d"dfdLdMZod%ee? d"dfdNdOZpdPeeqeerf  d"dfdQdRZsdzdSdTZtejad"ed fdUdVZuejvG dWdX dXZwex ZydYeeR dZeKeLef d"eKeLeKeLeJeL f f fd[d\Zzd]eKeLef d"eKeLeKeLef f fd^d_Z{d"eKeLef fd`daZ|d"eKeLeKeLeJeL f f fdbdcZ}	d{ddeee? e=f deeLdfePd"eeR fdgdhZ~diedjed"dfdkdlZdmeLd"efdndoZddpdqedreLf dsejjdteKeLef duee5 d"eLf
dvdwZdS )|    N)CallableIteratorSequence)AnyIOOptionalUnion)patch)
draw_graphget_aot_graph_nameget_graph_being_compiled)fx)save_graph_repro)get_debug_dir)utils)getArtifactLogger)trace_structured)signpost_event)GraphModule)_extract_tensor_metadataTensorMetadata)legalize_graph)FileLike)
OrderedSet)tree_map   )configir)ExternKernel)BaseSchedulerNodeFusedSchedulerNodeNopKernelSchedulerNode
OutputNodeSchedulerNode)VGRAPH_EXECUTION_ORDERFRECORD_GRAPH_EXECUTIONGRAPH_COMPILE_IDSir_pre_fusionir_post_fusionBufMetanamen_origin)dotz-Gnslimit=2z-Gnslimit1=2z-Gmaxiter=5000returnc                   C   s   t dd uS )Nr-   )shutilwhich r1   r1   _/var/www/addictedbytheproject.nl/epg/venv/lib/python3.10/site-packages/torch/_inductor/debug.pyhas_dot?   s   r3   nodesprint_graphfnamec           	   	   C   s   t  s
td dS |du rt }t| }|jD ]?}d|jvr q|jd j}t|t	r<t|d t
r8|d f}n|d }d}t|tjrH|jj}t||ddddd}||jd< q|r^t| ti |}t| |j  t||dtjjd dS )z$
    Draw a graph in fname.svg.
    z*draw_buffers() requires `graphviz` packageNfusion_metar   tensor_metaF)
clear_metadot_graph_shape)r3   logwarningr   create_fx_from_snodesr4   metagroup
isinstancetupleintr   ComputedBufferdatadtyper   printr   r   graphlintr
   r   tracer:   )	r4   r5   r6   rG   noder?   rE   metadatagmr1   r1   r2   draw_buffersD   s6   






rM   snodesc              
      sf  dt dtdtf fdd}tdg d}i }i }tj }d}g }d}| D ]}	|	 r1d	}
|
}n-|		 r:d
}
|
}n$t
|	trDd}
|
}nt
|	trOd}
|	j}nt
|	trZd}
|	j}ntdtjj|	 d}|
 d| }||}i }t|	drd|	 i}|j|d|d}dtttf dtf fdd  |	r|| |	 }||_|||	|
|jd< |||< |	 D ]}||| < q|du r|}q&| D ]Q}	|	 }|	jj }|| }g }|D ]9}|j|v r||j }n!|!| |"|j}|||j< W d   n	1 sw   Y  ||krq|| qt#||_$q|%t&|dkr,|d  |S t#| |S )B
    Creates a FX Graph from a list of SchedulerNode objects.
    r+   r.   .c                 S   s   dt dtfdd}| |_|S )Nargsr.   c                  W   s   dS Nr   r1   )rP   r1   r1   r2   func1y      z;create_fx_from_snodes.<locals>.get_fake_func.<locals>.func1)r   rB   __name__)r+   rR   r1   r1   r2   get_fake_funcx   s   z,create_fx_from_snodes.<locals>.get_fake_func
FusionMeta)r?   snodetypeNexterntemplatenopcomputefusedzUnknown node typeoriginal_atenz: 
get_devicedevicer1   rP   kwargsrW   c                    s8   t | trt fdd| jD S tdd |  D S )Nc                 3   s    | ]} |V  qd S Nr1   ).0x	in_outputr1   r2   	<genexpr>   s    z;create_fx_from_snodes.<locals>.in_output.<locals>.<genexpr>c                 s   s(    | ]}|j D ]	}t|jtV  qqd S rc   )usersr@   rJ   r"   )rd   bufuserr1   r1   r2   rh      s    
)r@   r    anyrN   get_outputs)rW   rf   r1   r2   rg      s
   
z(create_fx_from_snodes.<locals>.in_outputr7   r   r   )'strr   rB   collections
namedtupletorchr   Graph	is_externis_templater@   r!   r#   r?   r    RuntimeError	_inductorr   get_fused_kernel_name	get_nodeshasattrr_   call_functionr   r   boolappendget_namer+   r>   rm   read_writesreadsinserting_beforeplaceholderrA   rP   outputlen)rN   rU   rV   buf_to_fx_nodenode_to_fx_noderG   
first_nodeoutputsr?   rW   	node_type
fused_name	func_name	node_funcrb   fx_noder+   rj   depsnew_argsdepdep_noder1   rf   r2   r=   s   s   




	



r=   node_name_to_buf_nameparent_buf_name	n_originsc           
      C   s   | d u rd S | D ]S}|  }| }|d ur)t|dkr)t|||d u r%|n| qt|dkr5|d |ks7J |j}|d u sC|jd u rDq|jD ]}|j}	|	|vrZ|d u rV|n|||	< qGqd S )Nr   r   )r}   rx   r   $update_orig_fx_node_name_to_buf_namerJ   originsr+   )
r4   r   r   r   rJ   buf_namechildren_nodesir_nodeorigin	node_namer1   r1   r2   r      s0   
r   c                 C   sp   i }|   D ]\}}||vrt|g||< q|| | qi }|   D ]\}}t|| }t||||< q$|S rc   )itemsr   addr   r*   )r   buf_name_to_n_noder   r   node_name_to_buf_metan_noder1   r1   r2   get_node_name_to_buf_meta   s   r   rL   c                 C   sP   i }t || |du rdS t|}| jjD ]}|j|v r%||j|jd< qdS )rO   Nbuf_meta)r   r   rG   r4   r+   getr>   )rL   rN   r   r   rJ   r1   r1   r2   annotate_orig_fx_with_snodes  s   

r   c               	   c   s    t jdddk} dd l}t|jjj}t	
 }| s,z
d V  W |  d S |  w |tdd t jt d}t j|dd tt j|d	t  d
}|tj |td || zd V  W || |  d S || |  w )NTORCH_COMPILE_DEBUG01r   z*functorch.compile.config.debug_partitionerTtorchinductor)exist_okaot_z
_debug.log3[%(filename)s:%(lineno)d %(levelname)s] %(message)s)osenvironr   torch._functorch.aot_autogradlogging	getLogger
_functorchaot_autogradrT   
contextlib	ExitStackcloseenter_contextr	   pathjoinr   makedirsFileHandlerr   setLevelDEBUGsetFormatter	Formatter
addHandlerremoveHandler)compile_debugrq   r;   stackr   fhr1   r1   r2   enable_aot_logging  s>   




r    _inductor_post_to_pre_grad_nodes._inductor_triton_kernel_to_post_grad_node_info_pre_grad_graph_id#_inductor_pre_grad_node_stack_trace_inductor_kernel_stack_trace(_inductor_kernel_provenance_debug_handlec                   C   s   da d S rQ   )r   r1   r1   r1   r2   -reset_inductor_kernel_provenance_debug_handleQ  s   r   c                  c   s    t } t }t }t }t }t}da i ai ai ai adazdV  W | a |a|a|a|a|adS | a |a|a|a|a|aw )zzContext manager that resets provenance tracking globals upon entering
    and restores their original values when exiting.r   N)r   r   copyr   r   r   r   )original_pre_grad_graph_idoriginal_post_to_pre_grad_nodes-original_triton_kernel_to_post_grad_node_info+original_inductor_pre_grad_node_stack_trace$original_inductor_kernel_stack_trace0original_inductor_kernel_provenance_debug_handler1   r1   r2   reset_provenance_globalsV  sH   r   c                   @   s*  e Zd Ze Zededee fddZ	d)ddZ
deddfd	d
Z	d*dededededee f
ddZej	d*dededededeee  f
ddZdedefddZd)ddZd)ddZdededdfddZdeee  dee d ee ddfd!d"Zd)d#d$Zd%edeed&  fd'd(ZdS )+DebugContextfolder_namer.   c                 C   sV   t jjpt }tjD ]}tj|d|  d| }tj	|s(t
| |  S q
d S )Nr   .)r   rI   	debug_dirr   r   _counterr   r   r   existsr   )r   r   ndirnamer1   r1   r2   create_debug_dir  s   

zDebugContext.create_debug_dirNc                 C   s   d | _ d | _t | _d S rc   )_prof_pathr   r   _stack)selfr1   r1   r2   __init__  s   zDebugContext.__init__new_pathc                 C   s   | j sd S |dsJ |ddlm} z.|| d tj|r(t| t	| j | W d    W d S 1 s;w   Y  W d S  t
yT   td| j | Y d S w )Nz.debugr   )FileLockz.lockz(Failed to copy debug files from %s to %s)r   endswithfilelockr   r   r   r   r/   rmtreecopytreeOSErrorr;   r<   )r   r   r   r1   r1   r2   r     s   
&
zDebugContext.copywfilename
write_moderP   rb   c                 O   s.   | j sJ ttj| j ||g|R i |S rc   r   openr   r   r   )r   r   r   rP   rb   r1   r1   r2   fopen  s   
$zDebugContext.fopenc                 o   s\    | j sJ ttj| j ||g|R i |}|V  W d    d S 1 s'w   Y  d S rc   r   )r   r   r   rP   rb   fr1   r1   r2   fopen_context  s
   
&"zDebugContext.fopen_contextsuffixc                 C   s   | j sJ tj| j |S rc   )r   r   r   r   )r   r   r1   r1   r2   r     s   
zDebugContext.filenamec                 C   s   t jjd urJdd l}| jsJ tj| jtj| j d}|	|d}|j
| jtj| jd W d    n1 s=w   Y  t j| d S d S )Nr   z.tar.gzzw:gz)arcname)r   rI   
upload_tartarfiler   r   r   r   basenamer   r   )r   r   tar_filetarr1   r1   r2   r     s   
zDebugContext.upload_tarc                    s   t jr#td  j} tj dtdd f fdd}| j	|| | j
t|  t jjs2d S | t | _t jjrD| dtj t jjrQ| dtj d S d S )Nztorch._dynamolevelr.   c                    s     |  d S rc   )r   )r   r;   r1   r2   reset_log_level  s   z/DebugContext.__enter__.<locals>.reset_log_levelz	debug.logzinfo.log)r   debugr   r   r   r   r   r   r   callbackr   r$   set_debug_handlerrI   enabledr   r   r   	debug_log_setup_log_captureinfo_logINFO)r   
prev_levelr   r1   r   r2   	__enter__  s   
zDebugContext.__enter__r   c                 C   sp   t d}| j| |}t |}|| |t d |	| |t
|j| | j|j| d S )Nztorch._inductorr   )r   r   r   r   r   StreamHandlerr   r   r   r   minr   r   r   )r   r   r   r;   fdchr1   r1   r2   r     s   



zDebugContext._setup_log_captureexc_typeexc_valexc_tbc                 C   sF   | j r| j   |   | jr|   tdt | j | j	  d S )Nz%s debug trace: %s)
r   disable_save_profile_datar   r   r;   r<   r   r   r   )r   r	  r
  r  r1   r1   r2   __exit__  s   
zDebugContext.__exit__c                 C   s   | j sJ | j | d | d)}tj| j |d}|  |d |d |d |d W d    d S 1 s?w   Y  d S )Nzcompile.profzcompile.stats)streamcumtimed   tottime)	r   
dump_statsr   r   pstatsStats
strip_dirs
sort_statsprint_stats)r   r  statsr1   r1   r2   r    s   



"zDebugContext._save_profile_datar+   ).Nc                 C   sb   t jjr$tt j|r$ztt| |W S  ty#   tjddd Y d S w dtdtdd fdd}|S )	Nz Ignoring exception in debug codeTexc_inforP   rb   r.   c                  _   s   d S rc   r1   ra   r1   r1   r2   ignored  rS   z)DebugContext.__getattr__.<locals>.ignored)	r   rI   r   getattrDebugFormatter	Exceptionr;   r<   r   )r   r+   r  r1   r1   r2   __getattr__  s   zDebugContext.__getattr__r.   N)r   )rT   
__module____qualname__	itertoolscountr   staticmethodrn   r   r   r   r   r   r   r   r   contextmanagerr   r   r   r   r  rB   r   rX   BaseExceptionr  r  r   r   r1   r1   r1   r2   r     sh    









r   c                   @   s  e Zd ZdeddfddZdejjdeej	 ddfdd	Z
dejjdeej	 ddfd
dZdeddfddZdeddfddZededefddZdeddfddZdejjdeddfddZd%dededdfddZdedeej dedef d ed!ed"ee ddfd#d$ZdS )&r  handlerr.   Nc                 C   s"   |j | _ |j| _|j| _|| _d S rc   )r   r   r   r)  )r   r)  r1   r1   r2   r   &  s   
zDebugFormatter.__init__rL   inputsc              
   C   s   |  dH}d }tjjjjrtjj|}t	j
|j}tjjjj}tjjddd t|||d||d W d    n1 sAw   Y  W d    n1 sPw   Y  |  d}||jdd W d    d S 1 sow   Y  d S )Nzfx_graph_runnable.pyF)ztrace.enabledztrace.save_real_tensorsinductor)save_dirstable_hashzfx_graph_readable.pyprint_output)r   rq   rv   r   rI   save_real_tensors_subclasses
fake_utilstry_convert_fake_to_realr   r   r   r+   r	   r   writeprint_readable)r   rL   r*  r  r,  r-  r1   r1   r2   fx_graph,  s.   "zDebugFormatter.fx_graphc                 C   sB   |  d}||jdd W d    d S 1 sw   Y  d S )Nzfx_graph_transformed.pyFr.  )r   r4  r5  )r   rL   r*  r  r1   r1   r2   fx_graph_transformedJ  s   "z#DebugFormatter.fx_graph_transformedr4   c                 C   @   |  d}|| | W d    d S 1 sw   Y  d S )Nzir_pre_fusion.txtr   r4  	_write_irr   r4   r  r1   r1   r2   r(   R     "zDebugFormatter.ir_pre_fusionc                 C   r8  )Nzir_post_fusion.txtr9  r;  r1   r1   r2   r)   V  r<  zDebugFormatter.ir_post_fusionc                 C   s2   t  }| D ]}||  |d q| S )Nz


)ioStringIOr4  	debug_strgetvalue)r4   rj   rJ   r1   r1   r2   r:  Z  s
   zDebugFormatter._write_irc                 C   s   t || dd d S )Nzgraph_diagram.svg)r6   )rM   r   )r   r4   r1   r1   r2   graph_diagramb  s   zDebugFormatter.graph_diagramc                 C   s,   t || t|| ddtdtjjd d S )Nzorig_fx_graph_diagram.svgFT)r6   r9   progparse_stack_tracer:   )r   r
   r   GRAPHVIZ_COMMAND_SCALABLEr   rI   r:   )r   rL   r4   r1   r1   r2   draw_orig_fx_graphe  s   

z!DebugFormatter.draw_orig_fx_graphpyr   	extensionc                 C   s   t || d|  d S )Nzoutput_code.)r/   r   r   )r   r   rG  r1   r1   r2   output_codet  s   zDebugFormatter.output_coder+   input_nodestimingsChoiceCallerelapseprecompile_elapseprescreening_elapsec                    s   ddl m  dt jdtttf f fdd|tj tj fdd|D |||d	}| j	d
ddd,}|
 D ]\}	}
t|	 }|| |
|d< t|| |d q;W d    d S 1 sew   Y  d S )Nr   )FixedLayoutrJ   r.   c              	      s  t | dr	| j}nd}|t| jd}z7|  }t| rC |j|jtj	j
|jtj	j
|jtj	j
j|jddd}t||d< nt||d< W n	 tyS   Y nw z
t|  |d< W n	 tyg   Y nw z
t|  |d	< W n	 ty{   Y nw zttj	j
|  |d
< W n	 ty   Y nw zttj	j
|  |d< W n	 ty   Y nw zttj	j
|  |d< W n	 ty   Y nw t | drt| jtjrڈ| j|d< |S )Nr+    )r+   rX   r   )fallback)rE   sizestrideoffsetlayoutrE   r`   rS  rR  numelrD   )ry   r+   rX   rT   get_output_specr@   r`   rE   r$   rG   sizevarsoptimization_hintsrR  rS  optimization_hintrT  rn   r  	get_dtyper_   
get_strideget_size	get_numelrD   r   IRNode)rJ   r   	node_inforU  static_layoutrO  build_node_infor1   r2   rc    st   

	z>DebugFormatter.log_autotuning_results.<locals>.build_node_infoc                    s   g | ]} |qS r1   r1   rd   rJ   )rc  r1   r2   
<listcomp>      z9DebugFormatter.log_autotuning_results.<locals>.<listcomp>)op_namecuda_device_namecuda_device_countrI  autotuning_timeprecompile_timeprescreening_timezautotuning_result_json_list.txtatzutf-8)encodingbenchmark_result
)r   rO  r_  dictrn   rq   cudaget_device_namedevice_countr   r   	info_dictupdatejsondumpr4  )r   r+   rI  rJ  rL  rM  rN  general_propertiesr  callertimeru  r1   rb  r2   log_autotuning_resultsw  s,   	"9	
"z%DebugFormatter.log_autotuning_results)rF  )rT   r"  r#  r   r   rq   r   r   listTensorr6  r7  SchedulerNodeListr(   r)   r&  rn   r:  rA  rE  rH  r   r_  rq  floatr   r|  r1   r1   r1   r2   r  %  sX    



r  c                 C   .   t tjrt dt|  tj	|  d S )NzBEFORE FUSION
%s)
ir_pre_fusion_logisEnabledForr   r  infor  r:  r$   r   r(   r4   r1   r1   r2   log_ir_pre_fusion     r  c                 C   r  )NzAFTER FUSION
%s)
ir_post_fusion_logr  r   r  r  r  r:  r$   r   r)   r  r1   r1   r2   log_ir_post_fusion  r  r  schedulec                    sD   zt ddd  fddd W d S  ty!   tjddd Y d S w )	Nartifactc                   S   
   dddS )Ninductor_collective_schedulerw  r+   rn  r1   r1   r1   r1   r2   <lambda>     z+_dump_collective_schedule.<locals>.<lambda>c                      s    S rc   r1   r1   r  r1   r2   r    s    metadata_fn
payload_fnzAFailed to log inductor_collective_schedule via structured loggingTr  )r   r  r;   r   r  r1   r  r2   _dump_collective_schedule  s   
r  c                    s&    fdd| D }|rt | d S d S )Nc                    s0   g | ]}t t|d d  tjrt ddqS )rJ   Npython_kernel_name)r@   r  r   _CollectiveKernelrd  opr1   r2   re    s    
z+log_collective_schedule.<locals>.<listcomp>)r  )r4   r  r1   r  r2   log_collective_schedule  s   
r  node_runtimesc              	      sJ  zt jjjdttt  dtt ffdd}dtdtt fdd}g  | D ]]\}}t	|j
d| }t|j
r;d	nd
}g }z0| D ])}|j
}	|	 }
t|	jtjrX|	 nd}|	 }|||
||||d qDW n	 tyx   Y nw  ||||d q&tddd  fddd W dS  ty   tjddd Y dS w )zDLog per-op runtime estimates and output tensor metadata for TLParse.re   r.   c                    s   | d ur
t  | S g S rc   )r}  re   )to_optimization_hintsr1   r2   to_list  s   z,log_runtime_and_tensor_meta.<locals>.to_listrE   c                 S   s"   | d u rd S t | }|d}|S )Nztorch.)rn   removeprefix)rE   sr1   r1   r2   dtype_to_str  s
   
z1log_runtime_and_tensor_meta.<locals>.dtype_to_strr  
collectiver\   N)shaperS  rE   )r+   rX   estimated_runtime_nsr   r  c                   S   r  )N inductor_runtime_and_tensor_metarw  r  r1   r1   r1   r1   r2   r  /  r  z-log_runtime_and_tensor_meta.<locals>.<lambda>c                      s   d iS )Nopsr1   r1   )r  r1   r2   r  3      r  z.Failed to log inductor_runtime_and_tensor_metaTr  )r$   rG   rX  rY  r   r   r   r}  rn   r  rJ   r}   r   is_collectiverm   maybe_get_sizer@   rU  r   Layoutr\  maybe_get_dtyper|   r  r   r;   r   )r  r  r  r  
runtime_nsr+   op_typer   rj   irnoder  rS  rE   r1   )r  r  r2   log_runtime_and_tensor_meta  sV   
"
	
r  c                   C   sH   t sdS ztddd dd d W dS  ty#   tjddd	 Y dS w )
z:Emit a structured artifact with the graph execution order.Nr  c                   S   r  )Ngraph_executionrw  r  r1   r1   r1   r1   r2   r  @  r  z%log_graph_execution.<locals>.<lambda>c                   S   s   dt iS )Ngraph_execution_order)r%   r1   r1   r1   r2   r  D  r  r  zFailed to log graph_executionTr  )r%   r   r  r;   r   r1   r1   r1   r2   log_graph_execution9  s   r  c                   c   sB    g a i adazdV  W t  dada dadS t  dada daw )z5Record graph execution order and log it once on exit.TNF)r%   r'   r&   r  r1   r1   r1   r2   $record_and_log_graph_execution_orderJ  s   r  c                   @   s    e Zd ZU eed< ejed< dS )TensorMetadataHoldertensor_metadatar`   N)rT   r"  r#  r   __annotations__rq   r`   r1   r1   r1   r2   r  Z  s   
 r  pre_grad_graph_idpost_to_pre_grad_nodes_jsonc              
      s  i i d}t |tstd |S t | ts|S tt}tt}zdttt	f dt
fdd}| D ]\ }t |tsGtd |  W S |D ]q}||sV|    W S |d| kro||d	    |  |d	   fd
d|dg D }|r| \}	||	s|    W S |	d| kr||	d	   | |	d	  |fdd|	dg D  |s~qIq4dttt	f ddfdd}
|
| |
| ||dW S  ty } z"tdddt|t d td| td|  |W  Y d}~S d}~ww )zx
    Create bidirectional mappings between pre_grad graph nodes
    and post_grad graph code nodes, and vice versa.
    )	preToPost	postToPrezCProvenance tacking error: post_to_pre_grad_nodes_json is not a dictrJ   r.   c                 S   sB   t | tstd dS d| vsd| vsd| vrtd dS dS )NzVProvenance tacking error: node provenance in post_to_pre_grad_nodes_json is not a dictFgraph_idr+   	from_nodezYProvenance tacking error: node provenance in post_to_pre_grad_nodes_json has wrong formatT)r@   rq  r;   error)rJ   r1   r1   r2   check_format  s   
z8create_mapping_pre_post_grad_nodes.<locals>.check_formatzIProvenance tacking error: post_to_pre_grad_nodes_json value is not a listr  r+   c                    s   g | ]}| fqS r1   r1   rd   r   )	outer_keyr1   r2   re    rf  z6create_mapping_pre_post_grad_nodes.<locals>.<listcomp>r  c                 3   s    | ]}| fV  qd S rc   r1   r  )
parent_keyr1   r2   rh     s    
z5create_mapping_pre_post_grad_nodes.<locals>.<genexpr>dNc                 S   &   | D ]
}t | | | |< qt| } d S rc   r}  rq  r  keyr1   r1   r2   convert_sets_to_lists     zAcreate_mapping_pre_post_grad_nodes.<locals>.convert_sets_to_listsr+  provenance_tracking_error"create_mapping_pre_post_grad_nodesfunction	error_msgstack_tracez post_to_pre_grad_nodes_json:  %szpre_grad_graph_id:  %s)r@   rq  r;   r  rB   ro   defaultdictr   rn   r   r{   r   r}  r   r   popextendr  r   	traceback
format_exc)r  r  empty_returnpre_to_postpost_to_prer  
node_arrayrJ   r   current_noder  er1   )r  r  r2   r  c  st   








	r  triton_kernel_to_post_grad_jsonc              
   C   s   i i d}t | tstd |S tt}z;|  D ]\}}t |ts.td |  W S |D ]	}|| 	| q0qdtt
tf ddfdd}|| | |dW S  tyy } ztd	d
dt
|t d td|  |W  Y d}~S d}~ww )zqCreate bidirectional mappings between triton kernel name and post_grad
    graph code nodes, and vice versa.
    )cppCodeToPostpostToCppCodezGProvenance tacking error: triton_kernel_to_post_grad_json is not a dictzMProvenance tacking error: triton_kernel_to_post_grad_json value is not a listr  r.   Nc                 S   r  rc   r  r  r1   r1   r2   r    r  zFcreate_node_mapping_kernel_to_post_grad.<locals>.convert_sets_to_listsr+  r  "create_mapping_kernel_to_post_gradr  z$triton_kernel_to_post_grad_json:  %s)r@   rq  r;   r  ro   r  r   r   r}  r   rn   r   r  r   r  r  )r  r  post_to_cpp_coder  r  	curr_noder  r  r1   r1   r2   'create_node_mapping_kernel_to_post_grad  sN   	



	r  c               
   C   s   z6i } t r0tt}i t|} tjjr0tj	dd}t
| | W d    n1 s+w   Y  d| d< | W S  tyX } ztdddt|t d i W  Y d }~S d }~ww )	Nz/inductor_provenance_tracking_node_mappings.jsonr   g       @versionr+  r  dump_inductor_provenance_infor  )r   r  r   r   r   rI   r   r$   r   r   rw  rx  r  r   rn   r  r  )node_mappingnode_mapping_kernelr  r  r1   r1   r2   r     s@   	r  c               
   C   s   zAt di } tt tt B }i }|D ]'}t|g }t }|D ]}|| |g  q$t|g |t|d||< q|W S  tyc } zt	dddt
|t d i W  Y d}~S d}~ww )zCreate kernel information JSONr  )stack_tracespost_grad_nodespre_grad_nodesr+  r  create_kernel_information_jsonr  N)r   r   r   r   keysr   rv  r}  r  r   rn   r  r  )r  all_kernelsresultkernel_namer  r  	post_noder  r1   r1   r2   r  &  s<   
	r  node_scheduler  rs   c           
   
      sr  t jjdkrdS zddlm}m} td7 ag }| dt }|rRt| ts'J t	
|g  | jr>| jj}| vr= | n  fdd| jD  t|  }n:t| tsYJ t }| D ])}|||fvr|jdurt	
|g  ||j    fdd|jjD  q^t|}t
|g | tW S  ty }	 ztd	d
dt|	t d W Y d}	~	dS d}	~	ww )z
    Set the mapping between `kernel_name` and the post_grad nodes in `node_schedule`.

    Returns a unique int debug handler for each call to this function.
    r   Nr   )DisableReductionEnableReduction:c                 3        | ]}|j  vr|j V  qd S rc   r+   rd   r   curr_node_infor1   r2   rh   u  s    
z:set_kernel_post_grad_provenance_tracing.<locals>.<genexpr>c                 3   r  rc   r  r  r  r1   r2   rh     s    
r+  r  'set_kernel_post_grad_provenance_tracingr  )r   rI   provenance_tracking_levelcodegen.simd_kernel_featuresr  r  r   r@   r   r   
setdefaultorigin_noder+   r|   r  r   r}  get_stack_tracesr   rJ   rv  r   r  r   rn   r  r  )
r  r  rs   r  r  r  origin_node_namestack_traces_setrW   r  r1   r  r2   r  P  sd   

	r  rP   rb   c            
      O   s   t jt d}t j|st | dtdtfdd}t|| |f\}}d}| d| dt	t
 d	}t|d
}t||f| W d   n1 sMw   Y  ttjrgd| d|d}	t|	 dS dS )z
    This function is used to save arguments for a compile_fx_inner function call
    to the file system.  Later on one can replay the compile_fx_inner call
    with the saved arguments using load_args_and_run_compile_fx_inner.
    inductor_saved_argsre   r.   c                 S   s    t | tjrtt| | jS | S )z
        Pickle FakeTensor will result in error:
        AttributeError: Can't pickle local object 'WeakValueDictionary.__init__.<locals>.remove'

        Convert all Tensor to metadata. This may also makes pickle faster.
        )r@   rq   r~  r  r   r`   r  r1   r1   r2   handle_tensor  s   z5save_args_for_compile_fx_inner.<locals>.handle_tensorcompile_fx_inner/_z.pklwbNz3
Arguments for a compile_fx_inner call is saved to z. To replay the call,
run the following:

from torch._inductor.debug import load_args_and_run_compile_fx_inner
load_args_and_run_compile_fx_inner(z
)
        )r   r   r   tempfile
gettempdirr   mkdirr   r   nextsave_args_cntr   picklerx  r;   r  r   r   rF   )
rP   rb   folderr   args_to_savekwargs_to_savefn_namer   r   messager1   r1   r2   save_args_for_compile_fx_inner  s$   
r  r   c              	   C   s   ddl m} t| d}t|\}}W d    n1 sw   Y  dtdtfdd}tjjdd	}|6 t	
d
d t|||f\}}||i |W  d    W  d    S 1 s]w   Y  W d    d S 1 smw   Y  d S )Nr   )r  rbre   r.   c                 S   s0   t | trtjj| jj| jj| jj	| j
S | S rc   )r@   r  rq   _dynamotestingrand_stridedr  r  rS  rE   r`   r  r1   r1   r2   r     s   
z9load_args_and_run_compile_fx_inner.<locals>.handle_tensorT)allow_non_fake_inputs	save_argsF)torch._inductor.compile_fxr  r   r
  loadr   rq   r1  FakeTensorModer   r	   r   )r   r  r   rP   rb   r   	fake_moder1   r1   r2   "load_args_and_run_compile_fx_inner  s   Rr  )package_pathfunc.exported_programinductor_configsr  c             
   C   s  ddl m} ddlm} ddlm} ddlm} |jj	}|j
dd}	t|	tjjs+J |j\}
}z[|r@|jjdkr@||d	|d
 |r|jjdkrt|	}t|j}t|}|||d |d |d
\}}t|}tjj||dd}| |j
dd|||ddd | |	|
||||dW S  |y } z||dd|d td |d }~w ty } z|rd}|jjdkrd}||d	||d |d }~ww )Nr   )AccuracyError)dump_to_minify)r   )_aoti_flatten_inputsF)check_guards   aot_inductor)options   r   )strictTaccuracy)r  r  load_and_runcheck_accuracy)r  r  r*  aot_inductor_accuracyminify)commandr&  zAccuracy failedrun)torch._dynamo.debug_utilsr   torch._dynamo.repro.aotir!  torch._inductorr   r  r"  r%  dump_aoti_minifiermoduler@   rq   r   r   example_inputsrepro_levelr   deepcopyrA   exportr;   r<   r  )r  r  r  r  r   r!  r   r"  use_minifierrL   rP   rb   gm_copyexample_inputs_copyconfig_copyflat_example_inputstuple_inputsflattened_epr  r.  r1   r1   r2   aot_inductor_minifier_wrapper  s   




	
r@  )FNrQ   r!  )F)ro   r   r   dataclasses	functoolsr=  r$  rw  r   r   os.pathr
  r  r/   r  r  collections.abcr   r   r   typingr   r   r   r   unittest.mockr	   rq   functorch.compiler
   r   r   r   torch._dynamo.repro.after_aotr   torch._dynamo.utilsr   r2  r   torch._loggingr   torch._logging._internalr   torch._utils_internalr   torch.fx.graph_moduler   torch.fx.passes.shape_propr   r   torch.fx.passes.tools_commonr   torch.typesr   torch.utils._ordered_setr   torch.utils._pytreer   rP  r   r   r   	schedulerr   r    r!   r"   r#   virtualizedr$   r   rT   r;   r%   r}  rq  rn   objectr  r&   r{   r'   rB   r  r  r  rp   r*   rD  cacher3   rM   rr   r=   r   r   r   r'  r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  rA   r  r  r  r  	dataclassr  r%  r	  r  r  r  r  r  r  r  r8  ExportedProgramr@  r1   r1   r1   r2   <module>   s,  
 



/f

$



 +
2  *
@

a

<"&-
P.

