o
    ei*                     @   s   d dl Z d dlmZ d dlZd dlmZ d dlmZmZm	Z	m
Z
 d dlmZ dddZd	d
 ZdddZdd Ze jded fddZe jded fddZe jded fddZdS )    N)	Generator)global_decomposition_table)_rnn_helpergather_paramsgru_cell	lstm_cell)
while_loopFc                    s>  |d }|d |r|d nd}|r|d nd t |dkr"|d nt |dkr,|d nd|d d}|d d}tjj| |||rLdntjdgt	|j
dd R |j|jd}	fd	d
}
 fdd}tjdtjd}t|
|||	||g\}}}}|r|d}||d|dffS )ay  
    1 layer fn for while loop LSTM

    Args:
        inp: Input tensor of shape (seq_len, batch, input_size)
        hidden: Tuple of (hx, cx) hidden states
        params: List of weight and bias tensors
        has_biases: Whether biases are included
        reverse: Whether to process sequence in reverse

    Returns:
        Tuple of (output, (final_hx, final_cx))
    r         N         dtypedevicec                       |   dk S Nr   size)iouthxcxprecomputed_input _/var/www/addictedbytheproject.nl/epg/venv/lib/python3.10/site-packages/torch/export/_patches.pycond_fn.      z*one_layer_while_loop_lstm.<locals>.cond_fnc              	      sn   |   }t| tj|dd d t| || dd\}}| }|d||< | d |||fS )Nr   r	   maxr
   )	chunk_dim)itemtorch_check_is_sizer   r   clonesqueeze)idxr   r   r   r   hh_bias	hh_weight	hr_weightr   r   r   body_fn1   s   

z*one_layer_while_loop_lstm.<locals>.body_fnr   )len	unsqueezer#   nn
functionallinearflipemptyr   tupleshaper   r   tensorint64r   r&   )inphiddenparams
has_biasesreverse	ih_weightih_biasr   r   step_outputr   r,   cnt_r   final_hxfinal_cxr   r(   r   one_layer_while_loop_lstm
   s6   *
rE   c	                 C   s   t |dkr
tdt|||d d|d dk}tt|d |d }	t}
t| |	||||||||

\}}tt| }|t	|d dt	|d dfS )a  
    LSTM implementation using while_loop for export compatibility.

    This is a drop-in replacement for the default LSTM decomposition that uses
    while_loop instead of Python loops, making it more suitable for torch.export.

    Args:
        input: Input tensor
        hx: Tuple of (h0, c0) hidden states
        params: List of weight and bias tensors
        has_biases: Whether biases are included
        num_layers: Number of LSTM layers
        dropout: Dropout probability
        train: Training mode
        bidirectional: Whether to use bidirectional LSTM
        batch_first: Whether batch dimension is first

    Returns:
        Tuple of (output, h_n, c_n)
    r
   zlstm expects two hidden statesr   r	   )
r.   AssertionErrorr   r   listziprE   r   r#   stackinputr   r;   r<   
num_layersdropouttrainbidirectionalbatch_firstr:   layer_fnr   final_hiddensr   r   r   lstm_while_loop_implI   s&   $"rS   c                    s   |d |d |r|d nd|r|d nd t jj| |r(dn|d}t jdgt|j	dd R |j
|jd}fdd} fd	d
}t jdt jd}	t|||	||g\}
}}|rs|d}||dfS )ad  
    1 layer fn for while loop GRU

    Args:
        inp: Input tensor of shape (seq_len, batch, input_size)
        hidden: Hidden state tensor
        params: List of weight and bias tensors
        has_biases: Whether biases are included
        reverse: Whether to process sequence in reverse

    Returns:
        Tuple of (output, final_hidden)
    r   r	   r
   Nr   r   c                    r   r   r   )r   r   
cur_hiddenr   r   r   r      r   z)one_layer_while_loop_gru.<locals>.cond_fnc                    sd   |   }t| tj|dd d t| | }| }|d||< | d ||fS )Nr   r	   r   )r"   r#   r$   r   r   r%   r&   )r'   r   rT   r   r)   r*   r?   r>   r   r   r   r,      s   
z)one_layer_while_loop_gru.<locals>.body_fnr-   )r#   r0   r1   r2   r3   r/   r4   r   r5   r6   r   r   r7   r8   r   r&   )r9   r:   r;   r<   r=   rT   r@   r   r,   rA   rB   r   final_hiddenr   rU   r   one_layer_while_loop_gru}   s,   

rW   c	                 C   sL   t ||d}t|d}	t}
t| |	||||||||

\}}|t|dfS )a  
    GRU implementation using while_loop for export compatibility.

    This is a drop-in replacement for the default GRU decomposition that uses
    while_loop instead of Python loops, making it more suitable for torch.export.

    Args:
        input: Input tensor
        hx: Hidden state tensor
        params: List of weight and bias tensors
        has_biases: Whether biases are included
        num_layers: Number of GRU layers
        dropout: Dropout probability
        train: Training mode
        bidirectional: Whether to use bidirectional GRU
        batch_first: Whether batch dimension is first

    Returns:
        Tuple of (output, h_n)
    Fr   )r   rG   unbindrW   r   r#   rI   rJ   r   r   r   gru_while_loop_impl   s    rY   return)NNNc              
   c   s    t d }|| d}| jtjjjd}z9||| < || jtjjj< dV  W |dur/||| < n|| d |durC|| jtjjj< dS | jtjjjd dS |durX||| < n|| d |durk|| jtjjj< w | jtjjjd w )a  
    Generic context manager for registering while_loop-based RNN decompositions.

    Args:
        rnn_op: The aten operation to patch (e.g., torch.ops.aten.lstm.input)
        rnn_impl: The while_loop-based implementation function

    Note:
        This is an internal helper. Use register_lstm_while_loop_decomposition()
        or register_gru_while_loop_decomposition() instead.
    post_autogradN)r   get
py_kernelsr#   _CDispatchKeyCompositeImplicitAutogradpop)rnn_oprnn_implregistryoriginal_decomporiginal_py_kernelr   r   r   &_register_rnn_while_loop_decomposition   s0   


rg   c                   c   @    t tjjjjt dV  W d   dS 1 sw   Y  dS )a  
    Context manager that temporarily registers the while_loop-based LSTM decomposition.

    The while_loop-based decomposition is more suitable for export and graph-based
    execution, as it avoids Python control flow that cannot be captured in the graph.
    This should support dynamic sequence lengths, however as while_loop does not
    support Autograd yet, an ExportedProgram created with this will not be trainable.

    Usage::

        from torch.export._patches import register_lstm_while_loop_decomposition
        from torch.export import export

        with register_lstm_while_loop_decomposition():
            # Export your model with LSTM
            ep = export(model, (x, h0, c0))

    Note:
        This context manager temporarily modifies the global decomposition table
        and py_kernels registration. The original registrations are restored when
        exiting the context.
    N)rg   r#   opsatenlstmrK   rS   r   r   r   r   &register_lstm_while_loop_decomposition     "rl   c                   c   rh   )a  
    Context manager that temporarily registers the while_loop-based GRU decomposition.

    The while_loop-based decomposition is more suitable for export and graph-based
    execution, as it avoids Python control flow that cannot be captured in the graph.
    This should support dynamic sequence lengths, however as while_loop does not
    support Autograd yet, an ExportedProgram created with this will not be trainable.

    Usage::

        from torch.export._patches import register_gru_while_loop_decomposition
        from torch.export import export

        with register_gru_while_loop_decomposition():
            # Export your model with GRU
            ep = export(model, (x, h0))

    Note:
        This context manager temporarily modifies the global decomposition table
        and py_kernels registration. The original registrations are restored when
        exiting the context.
    N)rg   r#   ri   rj   grurK   rY   r   r   r   r   %register_gru_while_loop_decomposition2  rm   ro   )F)
contextlibcollections.abcr   r#   torch._decompr   torch._decomp.decompositionsr   r   r   r   "torch._higher_order_ops.while_loopr   rE   rS   rW   rY   contextmanagerrg   rl   ro   r   r   r   r   <module>   s$    
?
461/