o
    @i`"                     @   sh  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZ d dlZd dlmZ d dlmZmZmZ g dZg dZee Zg dZed	g Zee Zed	g Zd
dgZdge dg e d	g Zeeeh d Z dd Z!dd Z"dd Z#dd Z$dd Z%dd Z&dd Z'dd Z(dd  Z)d!d" Z*d#d$ Z+d%d& Z,d'd( Z-d)d* Z.d+d, Z/d-d. Z0d/d0 Z1d1d2 Z2d3d4 Z3dTd5ee fd6d7Z4dUd8ej5d9eeej6f fd:d;Z7d8e8d9ej9fd<d=Z:d9e8fd>d?Z;d@dA Z<dVdCdDZ=dEdF Z>dVdGdHZ?ej@jAe=  e? dIZBdJeCdKeCfdLdMZDdNeej6ejEjFjf d9ej6fdOdPZGdUdQeee8  fdRdSZHdS )W    Nknobs)OptionalSetUnion)RandomState)TensorWrapperreinterprettype_canonicalisation_dict)int8int16int32int64)uint8uint16uint32uint64)float16float32float64bfloat16float8_e4m3fnfloat8_e5m2boolr   >   r   r   r   c                   C   s   t jdddkS )NTRITON_INTERPRET01)osenvironget r    r    b/var/www/addictedbytheproject.nl/epg/venv/lib/python3.10/site-packages/triton/_internal_testing.pyis_interpreter      r"   c                   C   s   t  rd S tjjj S N)r"   tritonruntimedriveractiveget_current_targetr    r    r    r!   r)      s   r)   c                  C      t  } | d u r	dS | jdkS )NFcudar)   backendtargetr    r    r!   is_cuda$      r0   c                   C      t  otj d dkS )Nr      r0   torchr+   get_device_capabilityr    r    r    r!   is_ampere_or_newer)      r7   c                   C      t  otj d dkS )Nr   
   r4   r    r    r    r!   is_blackwell-   r8   r;   c                   C   r2   Nr   	   r4   r    r    r    r!   is_hopper_or_newer1   r8   r>   c                   C   r9   r<   r4   r    r    r    r!   	is_hopper5   r8   r?   c                   C   r9   )Nr      r4   r    r    r    r!   is_sm12x9   r8   rA   c                  C   r*   )NFhipr,   r.   r    r    r!   is_hip=   r1   rC   c                  C   "   t  } | d uo| jdko| jdkS )NrB   gfx90ar)   r-   archr.   r    r    r!   is_hip_cdna2B      rH   c                  C   rD   )NrB   gfx942rF   r.   r    r    r!   is_hip_cdna3G   rI   rK   c                  C   rD   )NrB   gfx950rF   r.   r    r    r!   is_hip_cdna4L   rI   rM   c                  C   "   t  } | d uo| jdkod| jv S )NrB   gfx11rF   r.   r    r    r!   is_hip_gfx11Q   rI   rP   c                  C   rN   )NrB   gfx12rF   r.   r    r    r!   is_hip_gfx12V   rI   rR   c                  C   rN   )NrB   gfx1250rF   r.   r    r    r!   is_hip_gfx1250[   rI   rT   c                   C   s   t  pt pt S r$   )rH   rK   rM   r    r    r    r!   is_hip_cdna`   r#   rU   c                   C   s   t  rdS dS )Ni  i   )rM   r    r    r    r!   get_hip_lds_sized   s   rV   c                  C   r*   )NFxpur,   r.   r    r    r!   is_xpuh   r1   rX   c                  C   s   t  } | d u r	dS t| jS )N )r)   strrG   r.   r    r    r!   get_archm   r1   r[   rsc                 C   s8  t | tr| f} |du rtdd}|tt v rOttt|}|du r&|jnt	||j}|du r3|j	nt||j	}tt|}|j
||| |d}d||dk< |S |rad|v ra|j
dd	| tjd}|S |tv ro|dd| |S |d
kr|dd| ddtd@ dS |dv r|dd| dkS td| )zp
    Override `rs` if you're calling this function twice and don't want the same
    result for both calls.
    N   )seed)dtype   r   float8   (   r   r   r   l      )r   int1bool_g        zUnknown dtype )
isinstanceintr   
int_dtypesuint_dtypesnpiinfogetattrminmaxrandintr   float_dtypesnormalastypeviewr   RuntimeError)shape	dtype_strr\   lowhighrk   r_   xr    r    r!   numpy_randomr   s,   


*rz   ry   returnc                 C   s   | j j}|tv r"|d}| tt|}ttj	||dtt
|S |r5d|v r5ttj	| |dtt
|S |dkrF|dkrFtj	| |d S tj	| |dS )z
    Note: We need dst_type because the type of x can be different from dst_type.
          For example: x is of type `float32`, dst_type is `bfloat16`.
          If dst_type is None, we infer dst_type from x.
    u)devicera   r   r   )r_   nameri   lstriprr   rl   rj   r	   r5   tensortlr   )ry   r}   dst_typetsigned_type_namex_signedr    r    r!   	to_triton   s   
r   c                 C   s   t t|  d S r$   )r   	str_to_tyr
   ry   r    r    r!   str_to_triton_dtype   s   r   c                 C   sL   t | tjjr
| jS t | tjrtdt| }|	dS t
dt|  )Nz^torch\.(\w+)$r`   znot a triton or torch dtype: )rf   r%   languager_   r~   r5   rematchrZ   group	TypeErrortype)r_   mr    r    r!   torch_dtype_name   s   
r   c                 C   sl   t | tr| j  ttt| j	S t | t
jr/| j	t
ju r)|    S |   S td|  )Nz Not a triton-compatible tensor: )rf   r   basecpunumpyrr   rl   rj   r   r_   r5   Tensorr   float
ValueErrorr   r    r    r!   to_numpy   s   
 r   Fc                 C   sl   t  rdS t s
dS tjjj}| rdnd}ttt|	d}t
|dks)J |tj d dko5||kS )	NTF)r@   r   )r@      .   r   r=   )r"   r0   r   nvidiaptxasversiontuplemaprg   splitlenr5   r+   r6   )
byval_onlycuda_versionmin_cuda_versioncuda_version_tupler    r    r!   supports_tma   s   
r   c                   C   s&   t  rdS t s
dS tj d dkS )NTFr   r=   )r"   r0   r5   r+   r6   r    r    r    r!   supports_ws   s
   r   c                 C   s   | rdS dS )NzURequires __grid_constant__ TMA support (NVIDIA Hopper or higher, CUDA 12.0 or higher)zLRequires advanced TMA support (NVIDIA Hopper or higher, CUDA 12.3 or higher)r    )r   r    r    r!   tma_skip_msg   s   r   )reasonsizealignc                 C   s   t j| t jddS )Nr+   )r_   r}   )r5   emptyr   )r   r   _r    r    r!   default_alloc_fn   r#   r   r   c                 C   s   t | tjjjr| jS | S r$   )rf   r%   r&   jitr   r   )r   r    r    r!   unwrap_tensor   s   r   skipped_attrc                    st   ddl m d u rt t fddj D g  j fdd} fdd}||fS )	Nr   r   c                    s4   i | ]\}}t | jr| jkr|vr||qS r    )rf   
base_knobs).0r~   knobset)r   r   r    r!   
<dictcomp>   s    
z%_fresh_knobs_impl.<locals>.<dictcomp>c                     sj     D ]+\} }t| |   |j D ]}|jtjv r(j	|jdd q 
|j qqd_S )NF)raisingT)itemssetattrcopyresetknob_descriptorsvalueskeyr   r   delenvappendpropagate_env)r~   r   knob)env_to_unsetr   	knobs_mapmonkeypatchr    r!   fresh_function   s   z)_fresh_knobs_impl.<locals>.fresh_functionc                     sL     D ]
\} }t| | q   D ]}|tjv r tj|= q_d S r$   )r   r   undor   r   r   )r~   r   k)r   r   r   r   prev_propagate_envr    r!   reset_function  s   

z)_fresh_knobs_impl.<locals>.reset_function)r%   r   setpytestMonkeyPatch__dict__r   r   )r   r   r   r    )r   r   r   r   r   r   r!   _fresh_knobs_impl   s   	r   )NNNr$   )F)Ir   r   r   rj   r5   r%   triton.languager   r   r   typingr   r   r   r   numpy.randomr   triton.runtime.jitr   r	   r
   rh   ri   integral_dtypesrp   float_dtypes_with_bfloat16dtypesdtypes_with_bfloat16torch_float8_dtypestorch_dtypessortedr   
tma_dtypesr"   r)   r0   r7   r;   r>   r?   rA   rC   rH   rK   rM   rP   rR   rT   rU   rV   rX   r[   rz   ndarrayr   r   rZ   r_   r   r   r   r   r   r   markskipifrequires_tmarg   r   r&   r   r   r   r    r    r    r!   <module>   sh    

 

$