o
    ei@                     @  s  U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZ d dlZd dlZd dlmZ d dlmZ d dlm  mZ d dlmZ d dlmZ d d	lmZ d
dl m!Z!m"Z"m#Z# d
dl$m%Z% d
dl&m'Z'm(Z(m)Z) erd dl*m+Z+m,Z,m-Z- d dl.m/Z/ d dl0m1Z1 edZ2edZ3e4e5Z6dVddZ7edWddZ8e#dXddZ9	 dYdZd'd(Z:d[d*d+Z;e#d\d,d-Z<G d.d/ d/ej=Z>e#d]d1d2Z?e#d^d3d4Z@d_d7d8ZAejBjCZCeCjDeCjEeCjFeCjGeCjHeCjIeCjJeCjKeCjLeCjMeCjNeCjOeCjPeCjQjReCjQjSeCjTeCjUeCjVeCjWeCjXeCjYeCjZhZ[ee[Z[e#d\d9d:Z\d`d>d?Z]dadAdBZ^d a_dCe`dD< dbdGdHZadcdPdQZb	RdddedTdUZcdS )f    )annotationsN)contextmanager)partial)AnyTYPE_CHECKING)	ParamSpecTypeVar)SymInt)get_decompositions)bind_symbols   )aot_function
aot_modulemake_boxed_compiler)strip_overloads)default_partition
draw_graph#min_cut_rematerialization_partition)Callable	GeneratorSequence)Node)IntLikeType_P_Rfx_gfx.GraphModulereturnc                 C  s4   | j jdtjjjdD ]}tjjj|_q|   | S )Ncall_functionoptarget)	graph
find_nodestorchopsaten_to_copytor!   	recompile)r   node r+   d/var/www/addictedbytheproject.nl/epg/venv/lib/python3.10/site-packages/torch/_functorch/compilers.py_canonicalize/   s   

r-   Generator[None, None, None]c               	   c  s6    t jd} zd V  W t j|  d S t j|  w )NF)r$   _C_jit_set_autocast_mode)old_jit_autocast_flagr+   r+   r,   _disable_jit_autocast8   s
   r2   inpsSequence[Any]torch.jit.ScriptModulec                 C  s:  t   t|  | jjdtjjjdD ]}t|j	dkr.t|j
dkr.d|j
v r.tjjj|_q| jjD ]}i }|j
 D ]\}}t|tjrI|j}|||< q<||_
q3| j  |   tj| }tj|j tj| }tj|}tdd |D s||  W d   |S W d   |S 1 sw   Y  |S )a  
    Compiles the :attr:`fx_g` with Torchscript compiler.

    .. warning::
        This API is experimental and likely to change.

    Args:
        fx_g(fx.GraphModule): The input Fx graph module to be compiled.

    Returns:
        Torch scripted model.
    r   r   r   dtypec                 s  s    | ]
}t |tjjV  qd S N)
isinstancer$   _subclasses
FakeTensor).0tr+   r+   r,   	<genexpr>n   s    zts_compile.<locals>.<genexpr>N)r2   r   r"   r#   r$   r%   r&   r'   lenargskwargsr(   r!   nodesitemsr8   devicetypelintr)   jitscriptr/   _jit_pass_remove_mutationfreezeevaloptimize_for_inferenceany)r   r3   r*   
new_kwargskvfr+   r+   r,   
ts_compileC   s<   

&




rQ   T_r   namestr
clear_metaboolc                 C  s   t | j t| ||d | S )N)rU   )printcoder   )r   rR   rS   rU   r+   r+   r,   _draw_graph_compiles   s   
rY   5Callable[[fx.GraphModule, list[Any]], fx.GraphModule]c                 C  s   t tt| dS )NrS   )r   r   rY   r[   r+   r+   r,   draw_graph_compile{   s   r\   c                 C  s   | S )z
    Returns the :attr:`fx_g` Fx graph module as it is. This is a no-op compiler
    and can be used to check accuracy.

    .. warning::
        This API is experimental and likely to change.

    r+   r   rR   r+   r+   r,   nop   s   
r^   c                      s4   e Zd Zdddd fddZd fddZ  ZS )DebugInterpreterNTinitial_envenable_io_processingr?   r   ra   dict[Node, Any] | Nonerb   rV   r   c                  s(   t | jg|R  | _t j|||dS )Nr`   )r   modulesymbol_mappingsuperrun)selfra   rb   r?   	__class__r+   r,   rg      s   
zDebugInterpreter.runnr   c           
        s   dfdddfd
ddfdd d  fdd}t  |}d|jv rpt|jd \}}t|\}}t|t|krNtt| dt| ttt|||D ]\}}	t	|	t
jsdqX|||	fdd qX|S )!Nnir   r   intc                   s@   t | ts| S t| jj j}|jst	d| t
|S )Nzexpected r to be a number, got )r8   r	   sympyexpandr*   exprxreplacere   	is_numberAssertionErrorrm   )rl   r)rh   r+   r,   subst_symint   s   
z/DebugInterpreter.run_node.<locals>.subst_symintnistuple[IntLikeType, ...]tuple[int, ...]c                   s   t  fdd| D S )Nc                 3  s    | ]} |V  qd S r7   r+   )r;   rl   ru   r+   r,   r=      s    zHDebugInterpreter.run_node.<locals>.subst_symint_tuple.<locals>.<genexpr>)tuple)rv   ry   r+   r,   subst_symint_tuple   s   z5DebugInterpreter.run_node.<locals>.subst_symint_tupleatorch.TensorbrV   c                   sT    |   dkr(t| jD ]} | |||kr' | |dkr' dS qdS )Nr   r   FT)numelrangendimstridesize)r|   r~   idxry   r+   r,   check_significant_strides   s   z<DebugInterpreter.run_node.<locals>.check_significant_stridesnvrvdescCallable[[], str]Nonec              	     s   t |stdt| | j|jkr"t|  d| j d|j |  | krDt|  d|   d|   d|   | |}|sct|  d|   d|   d|  d S )Nz"expected desc to be callable, got z:  != z aka )callablers   rD   r6   r   r   )r   r   r   same_strides)r   r{   r+   r,   check   s   *
*z(DebugInterpreter.run_node.<locals>.checkvalr   c                     s   d  dj  S )Nzoutput z where )re   r+   )irh   r+   r,   <lambda>   s    z+DebugInterpreter.run_node.<locals>.<lambda>)rl   r   r   rm   )rv   rw   r   rx   )r|   r}   r~   r}   r   rV   )r   r}   r   r}   r   r   r   r   )rf   run_nodemetapytreetree_flattenr>   rs   zipr   r8   r$   Tensor)
rh   rk   r   rt   n_vals_n_specr_vals_r_specr   r   ri   )r   r   rh   ru   r{   r,   r      s   

zDebugInterpreter.run_node)r?   r   ra   rc   rb   rV   r   r   )rk   r   r   r   )__name__
__module____qualname__rg   r   __classcell__r+   r+   ri   r,   r_      s
    r_   DCallable[[DebugInterpreter, Any, dict[Node, Any] | None, bool], Any]c                 C  s
   t | jS )z
    Returns a (slow) interpreter over the FX graph module that also checks
    various debugging properties (e.g., that tracing strides matched real
    strides.)
    )r_   rg   r]   r+   r+   r,   	debug_nop   s   
	r   c                 C  s(   t |  tj| }tj| }|S r7   )r   r$   rF   rG   rI   rJ   )r   rR   rP   r+   r+   r,   simple_ts_compile   s   r   rP   Callable[..., Any]c                 C  s
   t | tS r7   )r   r   )rP   r+   r+   r,   nnc_jit   s   
r   c                 C  s   t | j | S r7   )rW   rX   r]   r+   r+   r,   print_compile  s   
r   fnCallable[_P, _R] | nn.Moduler@   c                 K  sF   t t ttd}|| t| tjjrt| fi |S t	| fi |S )a~  
    Wrapper function over :func:`aot_function` and :func:`aot_module` to perform
    memory efficient fusion. It uses the
    :func:`min_cut_rematerialization_partition` partitioner to perform efficient
    recomputation. It uses NVFuser to compile the generated forward and backward
    graphs.

    .. warning::
        This API is experimental and likely to change.

    Args:
        fn (Union[Callable, nn.Module]): A Python function or a ``nn.Module``
            that takes one or more arguments. Must return one or more Tensors.
        **kwargs: Any other overrides you want to make to the settings

    Returns:
        Returns a ``Callable``  or ``nn.Module`` that retains the eager behavior
        of the original :attr:`fn`, but whose forward and backward graphs have
        gone through recomputation optimizations, and the graphs have been
        compiled with nvfuser.

    fw_compilerbw_compilerpartition_fndecompositions)
rQ   r   default_decompositionsupdater8   r$   nnModuler   r   )r   r@   configr+   r+   r,   memory_efficient_fusion  s   
r   Sequence[torch.Tensor]c                 C  sH   |  d tddd |D  d ddlm} |  |  t| |S )NfooaQ  
##############################################################
# To minimize FX graph, copy and paste the below and run it  #
##############################################################

import torch
import torch.fx as fx
from functorch.compile import minifier, check_nvfuser_subprocess, check_nvfuser_correctness_subprocess

inps = c                 S  s   g | ]}|j |jfqS r+   )shaper6   )r;   r   r+   r+   r,   
<listcomp>G  s    z!debug_compile.<locals>.<listcomp>a?  
inps = [torch.ones(shape, dtype=dtype, device='cuda') for (shape, dtype) in inps]
from foo import FxModule
mod = FxModule().cuda()

with torch.jit.fuser("fuser2"):
  # check_nvfuser_subprocess can be replaced with check_nvfuser_correctness_subprocess
  minifier(fx.symbolic_trace(mod), inps, check_nvfuser_subprocess)
r   )FxModule)	to_folderrW   r   r   cudarQ   )r   r3   r   r+   r+   r,   debug_compile9  s   
	
r   rm   graph_indexinput_data_pathlist[torch.Tensor]c                 C  s   g }t | dU}t|}g }|D ]B}t|dkr"|}|t }n,|\}}}}	}
|	tjtjtjtj	tjtj
tthv rFtjdd||	|
d}ntj||	|
d}|| qW d   |S 1 s_w   Y  |S )zZ
    Return a random input for the given inputs meta generated from _save_fx_default.
    rbr   r   )r6   rC   N)openpickleloadr>   randomr$   rm   int32int64rV   uint8floatrandintrandappend)r   inputsrP   inputs_metar   rD   input_r   _strider6   rC   r+   r+   r,   
get_inputs[  s6   


r   current_namefolder_namedump_example_inputgmtorch.fx.GraphModuleexample_inputs	nn.Modulec           	        sl   ddl m} dfddd fdddfdd}dfdd}d fdd}||||||tdS )!aO  
    The forward, backward, and joint computation graph will be stored in
    {folder_name}/{current_name}/{current_name}_forward_{graph_index},
    {folder_name}/{current_name}/{current_name}_backward_{graph_index}, and
    {folder_name}/{current_name}/{current_name}_joint_{graph_index} respectively.
    The input shape of the graphs will be stored in the .input files.
    These files can be loaded with pickle,
    and is a list of format (type, shape, stride, dtype, device).
    In the case of type = int or float, it is just (type,).
    For joint graph input, it is a nested list [[],[]]
    where the two inner lists have the same format.
    If dump_example_input is True, example_inputs will be stored in .pt file.
    Since each function might produce multiple graphs,
    the graph_index is used to distinguish difference graphs
    r   )aot_module_simplifiedr?   r   r   	list[Any]c                   s   g }t | dkr!t| d tr!| | d 7 }| | d 7 }|S | D ](}t|tu s1t|tu r:|t|f q#|t||j| |j	|j
f q#|S )Nr   r   )r>   r8   rz   rD   rm   r   r   r   r   r6   rC   )r?   
input_metaarg)get_input_metar+   r,   r     s   z(_save_fx_default.<locals>.get_input_meta
gm_to_saver   	type_namerT   r   c                   sB  t | jjdkrttjd |t d S t| }|j	t
jj  |  |}tj d  dd | d  d  d| dt 	 t d  d  d| dt d  d| dt dd}t|| W d    n1 sxw   Y  rt
| d  d  d| dt d  d| dt d	 d S d S )
Nr   z!No nodes in graph {%s}_{%s}_{%s}./T)exist_okrR   z.inputwbz.pt)r>   r"   rA   logloggingWARNINGr   copydeepcopyset_codegenr$   fxCodeGenr)   osmakedirsr   r   r   dumpsave)r   r?   r   r   r   rP   )r   r   r   r   r+   r,   graph_saver_helper  s<   
22z,_save_fx_default.<locals>.graph_saver_helperr   r   r   c                   s    | |d | S )Nforwardr+   r   r   r   r+   r,   graph_saver_forward  s   z-_save_fx_default.<locals>.graph_saver_forwardc                   s    | |d t d7 a | S )Nbackwardr   )r   r   r   r+   r,   graph_saver_backward  s   z._save_fx_default.<locals>.graph_saver_backward
joint_args%tuple[fx.GraphModule, fx.GraphModule]c                   s    | |d t | |S )Njoint)r   )r   r   r   r+   r,   graph_saver_joint  s   
z+_save_fx_default.<locals>.graph_saver_jointr   N)r?   r   r   r   )r   r   r?   r   r   rT   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )functorch.compiler   r   )	r   r   r   r   r   r   r   r   r   r+   )r   r   r   r   r   r,   _save_fx_defaultz  s   #r   F Callable[[bool, nn.Module], Any]c                 C  s   da tt| ||S )as  
    Dump the forward, backward, and joint computation graph.
    Example Usage:
    save_fx_func = graph_dumper_aot(current_name, folder_name, dump_example_input = False)
    optimize_ctx = torchdynamo.optimize(
        save_fx_func
    )
    with torch.enable_grad():
        with optimize_ctx:
            result = forward_and_backward_pass(model, example_inputs)
    r   )r   r   r   )r   r   r   r+   r+   r,   graph_dumper_aot  s   r   )r   r   r   r   )r   r.   )r   r   r3   r4   r   r5   )T)
r   r   rR   r   rS   rT   rU   rV   r   r   )rS   rT   r   rZ   )r   r   rR   r   r   r   )r   r   rR   r   r   r   )r   r   rR   r   r   r5   )rP   r   r   r   )r   r   r@   r   r   r   )r   r   r3   r   r   r5   )r   rT   r   r   )r   rT   r   rT   r   rV   r   r   r   r   r   r   )F)r   rT   r   rT   r   rV   r   r   )d
__future__r   r   r   r   r   r   
contextlibr   	functoolsr   typingr   r   typing_extensionsr   r   rn   r$   torch.fxr   torch.nnr   torch.utils._pytreeutils_pytreer   r	   torch._decompr
   %torch.fx.experimental.symbolic_shapesr   aot_autogradr   r   r   compile_utilsr   partitionersr   r   r   collections.abcr   r   r   torch.fx.noder   torch.typesr   r   r   	getLoggerr   r   r-   r2   rQ   rY   r\   r^   Interpreterr_   r   r   r   r%   r&   detachgelu_backwardleaky_relu_backwardsigmoid_backwardthreshold_backwardhardtanh_backwardhardsigmoid_backwardhardswish_backwardtanh_backwardsilu_backwardelu_backwardcudnn_batch_normcudnn_batch_norm_backwardmasked_fillScalarr   elu
leaky_reluhardtanh	hardswishhardsigmoidconj_physicalis_same_sizer   r   r   r   r   __annotations__r   r   r   r+   r+   r+   r,   <module>   s    

	
0
H


'

k