o
    kiM                  #   @   s,  U d Z ddlZddlmZ ddlmZmZ ddlmZ ddl	Z	ddl
mZ edZedZi Zee	jjef ed	< eh d
Zdededeeef deeeef geeef f fddZ	dCddddde	jde	jde	jdee	j dededede	jfddZedde	dCddddde	jde	jde	jdee	j dededede	jfddZd ee d!edefd"d#Zd$e	jd%edede	jfd&d'Zd(e	jd)e	jd*ed+ed ee d,ede	jfd-d.Zd*ed+eddfd/d0Z d(e	jd)e	jd*ed+ed ee de	jfd1d2Z!			dDdddddd3dd4d(e	jd)e	jd5e	jd6ee	j d7ee	j d8ee	j d9ed:ed;ed,ed ee d<ed=ee de"e	je	je	je	jf fd>d?Z#ed@de#			dDdddddd3dd4d(e	jd)e	jd5e	jd6ee	j d7ee	j d8ee	j d9ed:ed;ed,ed ee d<ed=ee de"e	je	je	je	jf fdAdBZ$dS )EzImplementations of ONNX operators as native Torch ops.

NOTE: Fake implementations:
    Refer to https://docs.pytorch.org/docs/stable/library.html#torch.library.register_fake
    for more details on how to create fake kernels.
    N)Callable)OptionalTypeVar)	ParamSpec)_dtype_mappings_P_RONNX_ATEN_DECOMP_TABLE>      
         op_typeopset_version	fake_implreturnc                    s.   dt ttf dt ttf f fdd}|S )zDDecorator to register an ONNX operator with a custom implementation.funcr   c                    sP   d }t jjd d| dd| }| tttt jj|< |  |S )Nopsetzonnx::. )mutates_args)torchlibrary	custom_opr	   getattropsonnxregister_fake)r   overloadtorch_opr   r   r   r   ^/var/www/addictedbytheproject.nl/epg/venv/lib/python3.10/site-packages/torch/onnx/ops/_impl.py	decorator'   s   

z_onnx_op.<locals>.decorator)r   r   r   )r   r   r   r"   r   r    r!   _onnx_op"   s   *r#   F)interleaved	num_headsrotary_embedding_dimx	cos_cache	sin_cacheposition_idsr$   r%   r&   c                C   s   |   S )zFFake implementation for RotaryEmbedding-23 for torch.compile purposes.)clone)r'   r(   r)   r*   r$   r%   r&   r   r   r!   _rotary_embedding_23_fake_impl5   s   r,   RotaryEmbedding   c                   s  | j t}d  d durWt dkfdd tj d  k fdd tj d kfd	d t dkoN dkfd
d nt dkod dkfdd |dkrwt| d} n$|dkrt|dkfdd d }|| }	 ||	g}
t| |
} tt| j dkdd  | j d }	|dkr|	}| ddddddd|f }| dddddd|df }|d dur  ntj d  koj d k fdd tj d  koj d k fdd tj d kfdd tj d kfdd tdtd|rk|dddddddddf }|dddddddddf }n
tj|ddd\}}| |  }| |  }|rt|d}t|d}tj	||fdd}t||j }n	tj	||fdd}tj	||fdd}|dkrt|S t|dS )z_RotaryEmbedding-23 https://onnx.ai/onnx/operators/onnx__RotaryEmbedding.html#rotaryembedding-23r   N   c                      s   d j  S )Nz6position_ids must be 2D when provided. Received shape shaper   )r*   r   r!   <lambda>Z   s    z%rotary_embedding_23.<locals>.<lambda>c                      s   d  dj d  S )Nz6position_ids first dim (batch) must match x.shape[0] (). Received r   r1   r   )
batch_sizer*   r   r!   r3   ^       r
   c                      s   d d j d  S )Nz;position_ids second dim (sequence) must match x.shape[-2] (r4   r
   r1   r   )r*   sequence_lengthr   r!   r3   b   r6   c                         d j  dj  S )NzWcos_cache/sin_cache must be 2D when position_ids is provided. Received cos_cache shape , sin_cache shape r1   r   r(   r)   r   r!   r3   f   
       c                      r8   )Nz[cos_cache/sin_cache must be 3D when position_ids is not provided. Received cos_cache shape r9   r1   r   r:   r   r!   r3   l   r;      )r   r0   r
   r<   c                      s
   d  S )NzKnum_heads must be provided for 3D inputs. Received input tensor with shape r   r   )input_shaper   r!   r3   y   s   
 c                   S      dS )Nzx should be a 4D tensor by nowr   r   r   r   r!   r3          c                      s   dj  d  d dS )Nzcos has shape  but expected (batch=, seq=, ...)r1   r   )r5   cosr7   r   r!   r3          c                      s   dj  d  d dS )Nzsin has shape rA   rB   rC   r1   r   )r5   r7   sinr   r!   r3      rE   c                      s   d j d  d dS )NzLast dimension of cos cache (rG   ') should match rotary_embedding_dim/2 ().r1   r   )rD   rotary_embedding_dim_halfr   r!   r3          c                      s   dj d  d  dS )NzLast dimension of sin cache (rG   rH   rI   r1   r   )rJ   rF   r   r!   r3      rK   dim)
r2   lenr   _checkrM   permutereshape	unsqueezechunkcat)r'   r(   r)   r*   r$   r%   r&   
input_rankhidden_size	head_size	new_shapex_rotatex_not_rotatex1x2realimagx_rotate_concatoutputr   )	r5   rD   r(   r>   r*   rJ   r7   rF   r)   r!   rotary_embedding_23C   s   



  "$
ra   scalerW   c                 C   s   | dur| S dt | S )z/Get the scale factor for attention computation.Ng      ?)mathsqrt)rb   rW   r   r   r!   _get_scale_factor   s   re   tensorr5   c                 C   s:   | j d | j d }}|| }| ||||dd S )z1Reshape 3D tensor to 4D for multi-head attention.r
   r0   )r2   view	transpose
contiguous)rf   r5   r%   r7   rV   rW   r   r   r!   _reshape_3d_to_4d   s   rj   QKcurrent_q_num_headscurrent_kv_num_headsqk_matmul_output_modec              	   C   s2   |dkrt | ||||S tt| |ddS )z1Get QK output tensor based on the specified mode.r   r/   rG   )_compute_qk_output_for_mode_0r   
zeros_likematmulrh   )rk   rl   rm   rn   rb   ro   r   r   r!   _get_qk_output_for_aten_spda   s
   	
rs   c                    s"   t   dk fdd dS )z-Validate Group Query Attention configuration.r   c                      s   d d  dS )Nzq_num_heads (z%) must be divisible by kv_num_heads (z	) for GQAr   r   rn   rm   r   r!   r3      s    z-_validate_gqa_configuration.<locals>.<lambda>N)r   rO   )rm   rn   r   rt   r!   _validate_gqa_configuration   s   
ru   c                 C   s`   |}||kr|| }|j |dd}t|| jd }t|}| | }	|| }
t|	|
ddS )zDHelper function to compute QK output for qk_matmul_output_mode == 0.r
   rL   r<   r/   rG   )repeat_interleavere   r2   rc   rd   r   rr   rh   )rk   rl   rm   rn   rb   K_for_qkrepeat_factorscale_factor
sqrt_scaleQ_scaledK_scaledr   r   r!   rp      s   	
rp           )	is_causalkv_num_headsq_num_headsro   rb   softcapsoftmax_precisionV	attn_maskpast_key
past_valuer~   r   r   r   r   c                C   sZ  | j d }t| j dkrE| j d }| j }|dur,|||j d |j d  |j d | f}n|||j d |j d | f}|}||||d f}n:| j d }| j }|duri|j d |j d |j d |j d  |j d f}n|j }|}| j d | j d | j d |d f}tj|| j| jd}tj||j|jd}tj||j|jd}tj|| j| jd}||||fS )z@Fake implementation for Attention-23 for torch.compile purposes.r   r<   r
   Nr0   dtypedevice)r2   rN   r   emptyr   r   )rk   rl   r   r   r   r   r~   r   r   ro   rb   r   r   r5   q_sequence_lengthoutput_shapepresent_key_shapepresent_value_shapeqk_output_shaper`   present_keypresent_value	qk_outputr   r   r!   _attention_23_fake_impl  sR   


r   	Attentionc          '      C   s  d\}}}t | j}| jd }t | jdkr;t|dko|dkdd  | jd }t| ||} t|||}t|||}tt | jdkoQt |jdkoQt |jdkdd  | j| }t|
|}
|d	urmtj||g|d
n| }|d	ur~tj||g|d
n| }||}}| j| }|j| }| j| }|j| }|dko|	dko|d	u o|d	u p|jtj	k}t
|| |rtjjj| |||d||
t	||kd}t| ||||
|	}n||kr|| }|j||d
}|j||d
}tj||| j| jd}|rt|d	u dd  ttj||tj	| jd}|| td}|d	ur4|jtj	kr0|| td}n|| }t|
| jd }t|} | |  }!||  }"t|!|"dd}#|#}|#| }$|	dkr`|$}|dkrn|t|$|  }$|	dkru|$}|d	ur|tv r|$j}%|$tj| }$tj|$dd
}&|&|%}&ntj|$dd
}&ntj|$dd
}&|	dkr|&}t|&|}|dkr|dd  !||d}||||fS )zMAttention-23 https://onnx.ai/onnx/operators/onnx__Attention.html#attention-23)r
   r0   r<   r   r<   c                   S   r?   )Nz;q_num_heads and kv_num_heads must be provided for 3D inputsr   r   r   r   r!   r3     r@   zattention_23.<locals>.<lambda>r
   r=   c                   S   r?   )Nz'Q, K, and V should be 4D tensors by nowr   r   r   r   r!   r3     r@   NrL   r}   )r   	dropout_pr~   rb   
enable_gqar   c                   S   r?   )Nz'Cannot use both is_causal and attn_maskr   r   r   r   r!   r3     r@   z-infr/   rG   r0   )"rN   r2   r   rO   rj   re   rT   r+   r   boolru   nn
functionalscaled_dot_product_attentionrs   rv   zerosr   trilonesmasked_fillfloatrc   rd   rr   rh   tanh-_ATTENTION_23_ALLOWED_INTERMEDIATE_PRECISIONStor   ONNX_DTYPE_TO_TORCH_DTYPEsoftmaxri   rg   )'rk   rl   r   r   r   r   r~   r   r   ro   rb   r   r   num_head_dimsequence_dimhead_diminput_shape_lenr5   r   q_head_sizer   r   rm   rn   kv_sequence_lengthcan_use_sdpar`   r   rx   	attn_biascausal_maskry   rz   r{   r|   qk_matmul_outputqk_with_biasoriginal_dtype
qk_softmaxr   r   r!   attention_23c  s   



(
















r   )N)NNN)%__doc__rc   collections.abcr   typingr   r   typing_extensionsr   r   torch.onnx.opsr   r   r   r	   dict_ops
OpOverload__annotations__	frozensetr   strintr#   Tensorr   r,   ra   r   re   rj   rs   ru   rp   tupler   r   r   r   r   r!   <module>   s   


	

	 




	


T	
