o
    k«Ïi¬W  ã                   @  sz  U d dl mZ d dlZd dlmZ d dlmZmZmZmZm	Z	 d dl
mZmZmZ d dlZd dlmZ er;d dlmZ dd	lmZ g d
¢ZedƒZedƒZeejdƒskedƒejjd< edƒejjd< edƒejjd< d dlmZmZmZ d3dd„Zd4dd„ZG dd„ deƒZ G dd„ dƒZ!e	dede"f f Z#de$d < e	!	"	d5d6d+d,„ƒZ%e	!	"	d5d7d/d,„ƒZ%	!	"	d5d8d2d,„Z%dS )9é    )ÚannotationsN)ÚCallable)ÚOptionalÚoverloadÚTYPE_CHECKINGÚ	TypeAliasÚUnion)Ú	ParamSpecÚSelfÚTypeVar)ÚTensor)Ú_POOL_HANDLEé   )Ú_dummy_type)Úis_current_stream_capturingÚgraph_pool_handleÚXPUGraphÚgraphÚmake_graphed_callablesÚ_RÚ_PÚ_XpuStreamBaseÚ	_XPUGraphÚ_xpu_graph_pool_handleÚ_xpu_isCurrentStreamCapturing)r   r   r   ÚreturnÚboolc                   C  s   t ƒ S )zÉReturn True if XPU graph capture is underway on the current XPU stream, False otherwise.

    If a XPU context does not exist on the current device, returns False without initializing the context.
    )r   © r   r   úZ/var/www/addictedbytheproject.nl/epg/venv/lib/python3.10/site-packages/torch/xpu/graphs.pyr   )   s   r   r   c                   C  s   t j tƒ ¡S )zBReturn an opaque token representing the id of a graph memory pool.)ÚtorchÚxpur   r   r   r   r   r   r   1   ó   r   c                      s²   e Zd ZdZd%d&‡ fdd„Zd'd(‡ fdd„Zd)‡ fdd„Zd)‡ fdd„Zd)‡ fdd„Zd)‡ fdd„Z	d*‡ fdd„Z
d)‡ fdd„Zd+‡ fdd„Zd,‡ fd!d"„Zd,‡ fd#d$„Z‡  ZS )-r   a  Wrapper around a XPU graph.

    Arguments:
        keep_graph (bool, optional): If ``keep_graph=False``, the
            executable command graph will be instantiated on GPU at the end of
            ``capture_end`` and the underlying modifiable command graph will be
            destroyed. Note that the executable command graph will not be
            instantiated at the end of ``capture_end`` in this
            case. Instead, it will be instantiated via an explicit called
            to ``instantiate`` or automatically on the first call to
            ``replay`` if ``instantiate`` was not already called. Calling
            ``instantiate`` manually before ``replay`` is recommended to
            prevent increased latency on the first call to ``replay``.

    FÚ
keep_graphr   r   r
   c                   s   t ƒ  | |¡S ©N)ÚsuperÚ__new__)Úclsr"   ©Ú	__class__r   r   r%   G   s   zXPUGraph.__new__NÚpoolúOptional[_POOL_HANDLE]ÚNonec                   s   t ƒ j|d dS )aÙ  Begin capturing XPU work on the current xpu stream.

        Typically, you shouldn't call ``capture_begin`` yourself.
        Use :class:`~torch.xpu.graph`, which call ``capture_begin`` internally.

        Arguments:
            pool (optional): Token (returned by :func:`~torch.xpu.graph_pool_handle` or
                :meth:`other_Graph_instance.pool()<torch.xpu.XPUGraph.pool>`) that hints this graph may share memory
                with the indicated pool.
        ©r)   N)r$   Úcapture_begin)Úselfr)   r'   r   r   r-   J   s   zXPUGraph.capture_beginc                   ó   t ƒ  ¡  dS )a  End XPU graph capture on the current stream.

        After ``capture_end``, ``replay`` may be called on this instance.

        Typically, you shouldn't call ``capture_end`` yourself.
        Use :class:`~torch.xpu.graph`, which call ``capture_end`` internally.
        N)r$   Úcapture_end©r.   r'   r   r   r0   W   s   zXPUGraph.capture_endc                   r/   )a/  Instantiate the XPU graph. Will be called by
        ``capture_end`` if ``keep_graph=False``, or by ``replay`` if
        ``keep_graph=True`` and ``instantiate`` has not already been
        explicitly called. Does not destroy the xpu modify command graph returned
        by ``raw_xpu_graph``.
        N)r$   Úinstantiater1   r'   r   r   r2   a   s   zXPUGraph.instantiatec                   r/   )z+Replay the XPU work captured by this graph.N)r$   Úreplayr1   r'   r   r   r3   j   r!   zXPUGraph.replayc                   r/   )z1Delete the graph currently held by this instance.N)r$   Úresetr1   r'   r   r   r4   n   r!   zXPUGraph.resetr   c                   ó
   t ƒ  ¡ S )zäReturn an opaque token representing the id of this graph's memory pool.

        This id can optionally be passed to another graph's ``capture_begin``,
        which hints the other graph may share the same memory pool.
        )r$   r)   r1   r'   r   r   r)   r   s   
zXPUGraph.poolc                   r5   )z.Enable debugging mode for XPUGraph.debug_dump.)r$   Úenable_debug_moder1   r'   r   r   r6   z   s   
zXPUGraph.enable_debug_modeÚ
debug_pathÚstrc                   s   t ƒ  |¡S )zÕ
        Arguments:
            debug_path (required): Path to dump the graph to.

        Calls a debugging function to dump the graph if the debugging is
        enabled via XPUGraph.enable_debug_mode()
        )r$   Ú
debug_dump)r.   r7   r'   r   r   r9   ~   s   zXPUGraph.debug_dumpÚintc                   r5   )z…Returns the underlying xpuGraph_t. ``keep_graph`` must be True.

        XPU doesn't provide APIs to manipulate this object.
        )r$   Úraw_xpu_graphr1   r'   r   r   r;   ˆ   ó   
zXPUGraph.raw_xpu_graphc                   r5   )a®  Returns the underlying xpuGraphExec_t. ``instantiate`` must have been called if ``keep_graph`` is True, or ``capture_end`` must have been called if ``keep_graph`` is False. If you call ``instantiate()`` after ``raw_xpu_graph_exec()``, the previously returned xpuGraphExec_t will be destroyed. It is your responsibility not to use this object after destruction.

        XPU doesn't provide APIs to manipulate this object.
        )r$   Úraw_xpu_graph_execr1   r'   r   r   r=      r<   zXPUGraph.raw_xpu_graph_exec)F)r"   r   r   r
   r#   )r)   r*   r   r+   ©r   r+   ©r   r   )r7   r8   r   r+   )r   r:   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r%   r-   r0   r2   r3   r4   r)   r6   r9   r;   r=   Ú__classcell__r   r   r'   r   r   6   s    
	
r   c                   @  sB   e Zd ZU dZdZded< 		ddd
d„Zddd„Zddd„ZdS )r   aŠ  Context-manager that captures XPU work into a :class:`torch.xpu.XPUGraph` object for later replay.

    Arguments:
        xpu_graph (torch.xpu.XPUGraph): Graph object used for capture.
        pool (optional): Opaque token (returned by a call to :func:`~torch.xpu.graph_pool_handle()` or
            :meth:`other_Graph_instance.pool()<torch.xpu.XPUGraph.pool>`) hinting this graph's capture
            may share memory from the specified pool.
        stream (torch.xpu.Stream, optional): If supplied, will be set as the current stream in the context.
            If not supplied, ``graph`` sets its own internal side stream as the current stream in the context.

    .. note::
        For effective memory sharing, if you pass a ``pool`` used by a previous capture and the previous capture
        used an explicit ``stream`` argument, you should pass the same ``stream`` argument to this capture.

    NúOptional[torch.xpu.Stream]Údefault_capture_streamÚ	xpu_graphr   r)   r*   Ústreamc                 C  sh   | j jd u rtj ¡ | j _|d u rdn|f| _|d ur|n| j j| _| jd u r+tdƒ‚| j| _|| _	d S )Nr   zcapture_stream must not be None)
r(   rF   r   r    ÚStreamr)   Úcapture_streamÚAssertionErrorÚ
stream_ctxrG   )r.   rG   r)   rH   r   r   r   Ú__init__ª   s   	ÿÿ

zgraph.__init__r   r+   c                 C  s0   t j ¡  t j ¡  | j ¡  | jj| jŽ  d S r#   )	r   r    ÚsynchronizeÚempty_cacherL   Ú	__enter__rG   r-   r)   r1   r   r   r   rP   Á   s   


zgraph.__enter__ÚargsÚobjectc                 G  s   | j  ¡  | jj|Ž  d S r#   )rG   r0   rL   Ú__exit__)r.   rQ   r   r   r   rS   Ê   s   
zgraph.__exit__)NN)rG   r   r)   r*   rH   rE   r>   )rQ   rR   r   r+   )	r@   rA   rB   rC   rF   Ú__annotations__rM   rP   rS   r   r   r   r   r   —   s   
 ü
	r   útorch.nn.Module.r   Ú_ModuleOrCallableé   FÚ	callablesÚsample_argsútuple[Tensor, ...]Únum_warmup_itersr:   Úallow_unused_inputr)   r*   c                 C  ó   d S r#   r   ©rX   rY   r[   r\   r)   r   r   r   r   Ò   ó   r   útuple[_ModuleOrCallable, ...]útuple[tuple[Tensor, ...], ...]c                 C  r]   r#   r   r^   r   r   r   r   Ü   r_   ú7Union[_ModuleOrCallable, tuple[_ModuleOrCallable, ...]]ú9Union[tuple[Tensor, ...], tuple[tuple[Tensor, ...], ...]]c           )        sô  t  ¡ rt  ¡ rtdƒ‚d}t| tƒs$d}| f} t ttdf |¡f}nt tttdf df |¡}g ‰ t	| |ƒD ]N\}}t|t j
jƒrlt|jƒdkrYt|jƒdkrYt|jƒdks]tdƒ‚tdd„ | ¡ D ƒƒsltd	ƒ‚t jjj|Ž }	ˆ  t|	ƒ¡ td
d„ |	D ƒƒs‡tdƒ‚q9dd„ ˆ D ƒ}
dd„ | D ƒ‰‡ ‡fdd„tt| ƒƒD ƒ}dd„ tt| ƒƒD ƒ}dd„ tt| ƒƒD ƒ}|du rÁtƒ n|}t j ¡  t j t j ¡ ¡\ t	| ||ƒD ]M\}}}d\}}}t|ƒD ]4}t jj ||Ž ¡}tdd„ |D ƒƒ}t|ƒdkrt jj|tdd„ |D ƒƒtdd„ |D ƒƒd|d}qæ|||fD ]}~q qØW d  ƒ n	1 s1w   Y  t j ¡  g }g }t	| ||ƒD ]8\}}}t jj ||d ||Ž }W d  ƒ n	1 sbw   Y  t jj !|¡\}}| t|ƒ¡ | |¡ qEg }g }t	t"|ƒt"|ƒt"|ƒƒD ]†\}}}tdd„ |D ƒƒ} tdd„ |D ƒƒ}d}t|ƒdkràt jj ||d! t jj|tdd„ |D ƒƒtdd„ | D ƒƒd|d}W d  ƒ n	1 sÛw   Y  g }!d}"|D ]}#|#j#rþ|durþ|! ||" ¡ |"d7 }"qæ|! d¡ qæt|!ƒ}!| | ¡ | |!¡ qŽ| $¡  | $¡  d:d/d0„}$g }%t%| ƒD ]F\}&}|$||& ||& ˆ|& |
|& ||& ||& ||& ||& ||& ƒ	}'t|t j
jƒrhd;d8d9„}(|(||j&|'|j'ƒ|_'|% |¡ q(|% |'¡ q(|rv|%d S t|%ƒS )<a¯  Accept callables (functions or :class:`nn.Module<torch.nn.Module>`\ s) and returns graphed versions.

    Each graphed callable's forward pass runs its source callable's
    forward XPU work as a XPU graph inside a single autograd node.

    The graphed callable's forward pass also appends
    a backward node to the autograd graph. During backward, this node runs the
    callable's backward work as a XPU graph.

    Therefore, each graphed callable should be a drop-in replacement for its source callable
    in an autograd-enabled training loop.

    See :ref:`Partial-network capture<partial-network-capture>` for detailed use and constraints.

    If you pass a tuple of several callables, their captures will use the same memory pool.

    Arguments:
        callables (torch.nn.Module or Python function, or tuple of these): Callable or callables to graph.
            If you pass a tuple of callables, their order in the tuple must be the same order they'll run
            in the live workload.
        sample_args (tuple of Tensors, or tuple of tuples of Tensors): Samples args for each callable.
            If a single callable was passed, ``sample_args`` must be a single tuple of argument Tensors.
            If a tuple of callables was passed, ``sample_args`` must be tuple of tuples of argument Tensors.
        num_warmup_iters (int): The number of warmup iterations. Currently, ``DataDistributedParallel`` needs
            11 iterations for warm up. Default: ``3``.
        allow_unused_input (bool): If False, specifying inputs that were not used when computing outputs
            (and therefore their grad is always zero) is an error. Defaults to False.
        pool (optional): Token (returned by :func:`~torch.xpu.graph_pool_handle` or
            :meth:`other_Graph_instance.pool()<torch.xpu.XPUGraph.pool>`) that hints this graph may share memory
            with the indicated pool.
    .. note::
        The ``requires_grad`` state of each Tensor in ``sample_args`` must match the state
        that's expected for the corresponding real input in the training loop.

    .. warning::
        This API is in beta and may change in future releases.

    .. warning::
        ``sample_args`` for each callable must contain only Tensors. Other types are not allowed.

    .. warning::
        Returned callables do not support higher order differentiation (e.g., double backward).

    .. warning::
        In any :class:`~torch.nn.Module` passed to :func:`~make_graphed_callables`, only parameters
        may be trainable. Buffers must have ``requires_grad=False``.

    .. warning::
        After you pass a :class:`torch.nn.Module` through :func:`~make_graphed_callables`,
        you may not add or remove any of that Module's parameters or buffers.

    .. warning::
        :class:`torch.nn.Module`\s passed to :func:`~torch.xpu.make_graphed_callables` must not have module hooks
        registered on them at the time they are passed. However, registering hooks on modules *after* passing them
        through :func:`~torch.xpu.make_graphed_callables` is allowed.

    .. warning::
        When running a graphed callable, you must pass its arguments in the same order and format
        they appeared in that callable's ``sample_args``.

    .. warning::
        The automatic mixed precision is supported in :func:`~torch.xpu.make_graphed_callables` only with disabled
        caching. The context manager `torch.amp.autocast()` must have `cache_enabled=False`.
    z_make_graphed_callables does not support the autocast caching. Please set `cache_enabled=False`.FT.r   z§Modules must not have hooks registered at the time they are passed. However, registering hooks on modules after passing them through make_graphed_callables is allowed.c                 s  s    | ]}|j d u V  qdS )FN©Úrequires_grad©Ú.0Úbr   r   r   Ú	<genexpr>I  ó   € z)make_graphed_callables.<locals>.<genexpr>zœIn any :class:`~torch.nn.Module` passed to :func:`~make_graphed_callables`, only parameters may be trainable. All buffers must have ``requires_grad=False``.c                 s  s    | ]	}t |tjƒV  qd S r#   )Ú
isinstancer   r   )rg   Úargr   r   r   ri   Q  ó   € zfIn the beta API, sample_args for each callable must contain only Tensors. Other types are not allowed.c                 S  s   g | ]}t |ƒ‘qS r   )Úlen)rg   rQ   r   r   r   Ú
<listcomp>Y  s    z*make_graphed_callables.<locals>.<listcomp>c                 S  s*   g | ]}t |tjjƒrt| ¡ ƒnd ‘qS )r   )rk   r   ÚnnÚModuleÚtupleÚ
parameters)rg   Úcr   r   r   ro   Z  s    ÿÿc                   s   g | ]
}ˆ | ˆ|  ‘qS r   r   ©rg   Úi©Úflatten_sample_argsÚper_callable_module_paramsr   r   ro   ^  s    ÿÿc                 S  ó   g | ]}t j ¡ ‘qS r   ©r   r    r   ©rg   Ú_r   r   r   ro   c  ó    c                 S  rz   r   r{   r|   r   r   r   ro   d  r~   N)NNNc                 s  ó    | ]}|j r|V  qd S r#   rd   ©rg   Úor   r   r   ri   q  rj   c                 s  r   r#   rd   ru   r   r   r   ri   u  s   € ÿ
ÿc                 s  s     | ]}|j rt |¡V  qd S r#   ©re   r   Ú
empty_liker€   r   r   r   ri   x  s   € ÿ
ÿ)ÚoutputsÚinputsÚgrad_outputsÚonly_inputsÚallow_unusedr,   c                 s  s$    | ]}|j rt |¡nd V  qd S r#   r‚   r€   r   r   r   ri   —  s   € 
ÿc                 s  r   r#   rd   r€   r   r   r   ri   ›  rj   c                 s  r   r#   rd   ru   r   r   r   ri   ¡  rj   c                 s  s    | ]	}|d ur|V  qd S r#   r   r€   r   r   r   ri   ¢  rm   é   Ú	fwd_graphr   Ú	bwd_graphÚmodule_paramsútuple[torch.nn.Parameter, ...]Úlen_user_argsr:   Úoutput_unflatten_specútorch.utils._pytree.TreeSpecÚstatic_input_surfacerZ   Ústatic_outputsÚstatic_grad_outputsútuple[Optional[Tensor], ...]Ústatic_grad_inputsr   úCallable[..., object]c	           
        s:   G ‡‡‡‡‡‡‡	fdd„dt jjƒ‰ d‡ ‡‡fdd„}	|	S )	Nc                      sD   e Zd Zed‡‡‡‡fdd„ƒZeejjjd‡ ‡‡fd
d„ƒƒZ	dS )zOmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.GraphedÚctxrR   r…   r   r   rZ   c                   sd   t ˆƒD ]}ˆ|  ¡ ||  ¡ krˆ|  || ¡ qˆ  ¡  tˆtƒs)tdƒ‚tdd„ ˆD ƒƒS )Nzstatic_outputs must be a tuplec                 s  s    | ]}|  ¡ V  qd S r#   ©Údetachr€   r   r   r   ri   Î  s   € zjmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.forward.<locals>.<genexpr>)ÚrangeÚdata_ptrÚcopy_r3   rk   rr   ÚRuntimeError)r—   r…   rv   )rŠ   rŽ   r‘   r’   r   r   ÚforwardÄ  s   €
zWmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.forwardÚgradsc                   sŽ   t |ƒt ˆƒkrtdt ˆƒ› dt |ƒ› ƒ‚tˆ|ƒD ]\}}|d ur0| ¡ | ¡ kr0| |¡ qˆ  ¡  tˆtƒs>tdƒ‚tdd„ ˆD ƒƒS )Nz	Expected z gradients but got z"static_grad_inputs must be a tuplec                 s  s$    | ]}|d ur|  ¡ n|V  qd S r#   r˜   rf   r   r   r   ri   à  s
   € ÿ
þzkmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.backward.<locals>.<genexpr>)rn   r   Úzipr›   rœ   r3   rk   rr   )r—   rŸ   ÚgÚgrad)r‹   r•   r“   r   r   ÚbackwardÐ  s   ÿ
€
ýzXmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.backwardN)r—   rR   r…   r   r   rZ   )r—   rR   rŸ   r   r   rZ   )
r@   rA   rB   Ústaticmethodrž   r   ÚautogradÚfunctionÚonce_differentiabler£   r   )r‹   rŠ   rŽ   r•   r“   r‘   r’   r   r   ÚGraphedÃ  s    
r¨   Ú	user_argsrR   r   c                    s0   t jjj| Ž }ˆ jt|ƒˆ Ž }t jj |ˆ¡S r#   )r   ÚutilsÚ_pytreeÚarg_tree_leavesÚapplyrr   Útree_unflatten)r©   Úflatten_user_argsÚout)r¨   rŒ   r   r   r   Úfunctionalizedæ  s   zVmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.functionalized)r©   rR   r   rR   )r   r¥   ÚFunction)
rŠ   r‹   rŒ   rŽ   r   r‘   r’   r“   r•   r±   r   )
r¨   r‹   rŠ   rŽ   rŒ   r   r•   r“   r‘   r’   r   Úmake_graphed_autograd_function¸  s   $#z>make_graphed_callables.<locals>.make_graphed_autograd_functionÚfuncrU   Úgraph_training_stater   ÚgraphedúCallable[_P, _R]Úorig_fwdc                   s   d	‡ ‡‡‡fdd„}|S )
Nr©   ú_P.argsÚuser_kwargsú	_P.kwargsr   r   c                    s&   ˆ j ˆkrˆ| i |¤ŽS ˆ| i |¤ŽS r#   )Útraining)r©   rº   ©r´   rµ   r¶   r¸   r   r   Únew_fwd  s   
zEmake_graphed_callables.<locals>.make_graphed_forward.<locals>.new_fwd)r©   r¹   rº   r»   r   r   r   )r´   rµ   r¶   r¸   r¾   r   r½   r   Úmake_graphed_forwardþ  s   z4make_graphed_callables.<locals>.make_graphed_forward)rŠ   r   r‹   r   rŒ   r   rŽ   r:   r   r   r‘   rZ   r’   rZ   r“   r”   r•   rZ   r   r–   )
r´   rU   rµ   r   r¶   r·   r¸   r·   r   r·   )(r   Úis_autocast_enabledÚis_autocast_cache_enabledr   rk   rr   ÚtypingÚcastr   r    rp   rq   rn   Ú_backward_hooksÚ_forward_hooksÚ_forward_pre_hooksÚallÚbuffersrª   r«   r¬   ÚappendÚ	TypeErrorrš   r   r    rN   rH   rI   Útree_leavesr¥   r¢   r   Útree_flattenÚreversedre   ÚreverseÚ	enumerater¼   rž   ))rX   rY   r[   r\   r)   Újust_one_callableÚ_sample_argsrt   rQ   Úflatten_argÚper_callable_len_user_argsÚ"per_callable_static_input_surfacesÚ
fwd_graphsÚ
bwd_graphsÚmempoolr´   r‘   Úgrad_inputsr„   Úoutputs_gradr}   ÚvÚper_callable_static_outputsÚ"per_callable_output_unflatten_specrŠ   Úfunc_outputsÚflatten_outputsÚspecÚ per_callable_static_grad_outputsÚper_callable_static_grad_inputsr’   r‹   r“   r•   Úgrad_idxrl   r³   Úretrv   r¶   r¿   r   rw   r   r   æ   s  Gÿ
ÿÿÿÿþ
þ
ÿ
ÿÿ÷€ÿíÿ

ÿýÿûÿ	

6÷
ÿ)r   r   r?   )rW   FN)rX   rV   rY   rZ   r[   r:   r\   r   r)   r*   r   rV   )rX   r`   rY   ra   r[   r:   r\   r   r)   r*   r   r`   )rX   rb   rY   rc   r[   r:   r\   r   r)   r*   r   rb   )&Ú
__future__r   rÂ   Úcollections.abcr   r   r   r   r   r   Útyping_extensionsr	   r
   r   r   r   Ú	torch.xpur   Ú_utilsr   Ú__all__r   r   ÚhasattrÚ_CÚ__dict__Útorch._Cr   r   r   r   r   r   r   rR   rV   rT   r   r   r   r   r   Ú<module>   sN    ÿ

a8û	ûû