o
    Di1                     @  s  d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
 d dlZd dlZd dlZd dlmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ dd	lmZ d d
lmZ ddlmZ  ddlm!Z" e
dZ#eG dd dZ$G dd dZ%G dd dZ&eddG dd dZ'dd Z(dd Z)dd Z*dd  Z+d!d" Z,d#d$ Z-ej.e,ej/gd%Z0ej.e,ej1gd%Z2ej.e-ej3gd%Z4G d&d' d'Z5G d(d) d)Z6e7 Z8G d*d+ d+Z9dOd-d.Z:dOd/d0Z;dOd1d2Z<G d3d4 d4Z=G d5d6 d6e=Z>G d7d8 d8e=Z?dOd9d:Z@dOd;d<ZAd=d> ZBd?d@ ZCdAdB ZDe6 ZEeeEZFdCdD ZGdEdF ZHG dGdH dHZIG dIdJ dJejJZKG dKdL dLZLG dMdN dNee# ZMdS )P    )annotationsN)TupleListDictCallableTypeVar)	dataclass)TritonSemantic)KernelInterface)TensorDescriptor   )InterpreterError)partial   )interpreter)irTc                   @  s^   e Zd ZU dZded< ded< ejedZded< d	d
 Z	dd Z
dd Zdd Zdd ZdS )TensorHandlez
        data: numpy array
        dtype: triton type, either pointer_type or scalar_type.
        we don't store block_type here because the shape information is already available in the data field
        attr: a dictionary of attributes
    znp.arraydataztl.dtypedtype)default_factoryr   attrc                 C  s:   t | j| jstd| jjd  d| jj d| j d S )Nznumpy data itemsize (   z) bits) exceeds dtype primitive_bitwidth (z bits) for triton type )_validate_np_data_sizer   r   
ValueErroritemsizeprimitive_bitwidthself r   d/var/www/addictedbytheproject.nl/epg/venv/lib/python3.10/site-packages/triton/runtime/interpreter.py__post_init__&   s   zTensorHandle.__post_init__c                 C  s   t | j S N)boolr   allr   r   r   r    __bool__+      zTensorHandle.__bool__c                 C  s$   | j }t|dr|j}t|ds|S )N
element_ty)r   hasattrr'   )r   r   r   r   r    get_element_ty.   s
   

zTensorHandle.get_element_tyc                 C  s   t | j | jS r"   )r   r   copyr   r   r   r   r    clone4      zTensorHandle.clonec                 C  s   || j |< d S r"   )r   )r   keyvaluer   r   r    set_attr7   r&   zTensorHandle.set_attrN)__name__
__module____qualname____doc____annotations__dataclassesfielddictr   r!   r%   r)   r+   r/   r   r   r   r    r      s   
 r   c                   @  s   e Zd Zdd Zdd ZdS )BlockPointerHandlec                 C  s(   || _ || _|| _|| _|| _|| _d S r"   )baseshapestridesoffsetsblock_shapeorder)r   r9   r:   r;   r<   r=   r>   r   r   r    __init__=   s   
zBlockPointerHandle.__init__c           	      C  s   | j  }|jd }t| j j| j}tj| jtd}t	t
| jD ]D}dgt
| j }| j| ||< | j| jt| j|  |}||| | j| j tj }||v rf||| j| jk @ |dk@ }q"t|| j jj}||fS )Nr   r   r   r   )r9   r)   r   npbroadcast_tor   r=   onesr#   rangelenr<   arangereshaper;   astypeuint64r:   r   r   scalar)	r   boundary_checkdtype_ttn_bytesptrsmasksdim
bcast_dimsoffr   r   r    materialize_pointersE   s   

" z'BlockPointerHandle.materialize_pointersN)r0   r1   r2   r?   rS   r   r   r   r    r8   ;   s    r8   c                   @  s(   e Zd Zddd	Zd
d ZdddZdS )TensorDescHandler9   r   r:   List[TensorHandle]r;   r=   	List[int]c                 C  s,   || _ t|| _|| _|| _|| _|| _d S r"   )r9   rE   ndimr:   r;   r=   padding)r   r9   r:   r;   r=   rX   r   r   r    r?   W   s   

zTensorDescHandle.__init__c                 C  s   | j j d dksJ dt| j| jksJ t| j| jks"J | jdks+J d| j jj}|j	d }| jd d D ]}|j | }|d dksOJ dq<| jd j dks^J d	d S )
N   r   zbase must be 16-byte alignedr   z"descriptor cannot be 0 dimensionalr   zstride must be 16-byte alignedzlast dim must be contiguous)
r9   r   itemrE   r;   rW   r=   r   r'   r   )r   	scalar_tyr   stridebyte_strider   r   r    validate`   s   

 zTensorDescHandle.validater<   c           	      C  s  t || jks	J | jjj}|jd }|d j| d dks"J dt| jj| j	}tj
| j	td}tt | j	D ]?}dgt | j	 }| j	| ||< || jt| j	|  |}||| | j| j tj }|d|k@ || j| jk @ }q:|jtjksJ t|| jjj}||fS )Nr   rZ   rY   r   z*block offset start must be 16-byte alignedr@   r   )rE   rW   r9   r   r'   r   r   rA   rB   r=   rC   r#   rD   rF   rG   r;   rH   rI   r:   r   rJ   )	r   r<   r\   r   rN   rO   rP   rQ   rR   r   r   r    rS   m   s   

  z%TensorDescHandle.materialize_pointersN)r9   r   r:   rU   r;   rU   r=   rV   )r<   rU   )r0   r1   r2   r?   r_   rS   r   r   r   r    rT   U   s    
	rT   T)frozenc                   @  s   e Zd ZU dZded< dZded< dZded< dZd	ed
< dZded< dZ	ded< dZ
d	ed< dZded< dZded< dZd	ed< dS )InterpreterOptionsNr7   extern_libsFr#   debugTsanitize_overflowstrarch)fp8e5fp8e5b16fp8e4nvfp8e4b8fp8e4b15z
Tuple[str]supported_fp8_dtypesr   !deprecated_fp8_dot_operand_dtypestf32default_dot_input_precision)rn   tf32x3ieeeallowed_dot_input_precisionsr   intmax_num_imprecise_acc_defaultr   backend_name)r0   r1   r2   rb   r4   rc   rd   rf   rl   rm   ro   rr   rt   ru   r   r   r   r    ra      s   
 ra   c                 C  s<   t |tjrdS | jd }|j}|dk rd}||krdS dS )NTr   F)
isinstancetlpointer_typer   r   )np_arraytl_dtypenp_dtype_bitwidthtl_dtype_bitwidthr   r   r    r      s   
r   c                 C  sD   | t jkrt jS | t jkrt jS | t jkrt jS | t jkr t jS | S r"   )	rA   uint8int8uint16int16uint32int32rI   int64r@   r   r   r    _get_signed_np_dtype   s   



r   c                 C  st  t | tjrttjS i tjtttjttjtj	ttj	tj
ttj
tjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttjtjttji}t | tjrt | jtjrttjS || j S ||  S r"   )rv   rw   rx   rA   r   rI   int1r#   float16float32float64r~   r}   r   r   r   r   r   bfloat16float8e5float8e5b16
float8e4nv
float8e4b8float8e4b15
block_typer'   )tt_dtypenp_typesr   r   r    _get_np_dtype   sX   	

r   c                 C  s  t td|j }t td|j }tj|  |d}||jd ? d@ }|j|j d }|j|j d }	|d|j> d @ }
|j}|j}||j? d|> d @ tj}|dk}t	|rtj
|tjd}t|jD ]}|
|? d@ }|j| ||dk< qh|
dk}d||  ||< || |||@ < |
| || > d|j> d @ |
|< tdt|| | d|	> d }||}||}|j|jkr|
|j|j ? d|j> d @ }|tjjkr|
d|j|j d > @ }||dk }||}n|
||j|j > d|j> d @ }|dk}t	|rH||j? d|> d @ tj}|dk}||@ }tj
|tjd}d| || |  ||< || || ? d|j||  > B ||< ||jd > ||j> B |B }|| jS )Nuintr@   r   r   )getattrrA   r   
frombuffertobytesfp_mantissa_widthexponent_biasrH   r   any
zeros_likerD   maximumminimum_irROUNDING_MODERTNErG   r:   )inputinput_dtypeoutput_dtyperounding_modeinput_uint_dtypeoutput_unint_dtype	input_binsigninput_exponent_widthoutput_exponent_widthsignificand
bias_inputbias_outputexponentsubnormal_indexbit_posi	bit_indexzero_significand_indexexponent_outputsign_outputsignificand_outputcut_offnon_zero_exponent_indexshiftoutputr   r   r    _convert_float   sl   
$


r   c                 C  s
   t | S r"   )matherfxr   r   r    _erf
  s   
r   c                 C  s   t | t | d? S )N@   )rs   )abr   r   r    
_umulhi_64  s   r   )otypesc                   @  s   e Zd Zedd ZdS )ExtraFunctionsc                 C  s   t |j| j|||S r"   )rw   tensorbuildercreate_fp_to_fphandle)r   dst_tyfp_downcast_rounding	_semanticr   r   r    _convert_custom_types  s   z$ExtraFunctions._convert_custom_typesN)r0   r1   r2   staticmethodr   r   r   r   r    r     s    r   c                   @  s  e Zd Zejjejjejjejjejjejjejj	ejj	iZ
ejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejjejji
ZdddZdd Zdd Zd	d
 Zdd Zdd Zdd Zdd Zdd Z dd Z!dd Z"dd Z#dd Z$dd Z%dd  Z&d!d" Z'd#d$ Z(d%d& Z)d'd( Z*d)d* Z+d+d, Z,d-d. Z-d/d0 Z.d1d2 Z/d3d4 Z0d5d6 Z1d7d8 Z2d9d: Z3d;d< Z4d=d> Z5d?d@ Z6dAdB Z7dCdD Z8dEdF Z9dGdH Z:dIdJ Z;dKdL Z<dMdN Z=dOdP Z>dQdR Z?dSdT Z@dUdV ZAdWdX ZBdYdZ ZCd[dZ ZDd\dZ ZEd]dZ ZFd^dZ ZGd_dZ ZHd`dZ ZIdadb ZJdcdd ZKdedf ZLdgdZ ZMdhdZ ZNdidZ ZOdjdZ ZPdkdZ ZQdldZ ZRdmdZ ZSdndZ ZTdodZ ZUdpdZ ZVdqdZ ZWdrdZ ZXdsdZ ZYdtdZ ZZdudZ Z[dvdZ Z\dwdZ Z]dxdZ Z^dydZ Z_dzdZ Z`d{dZ Zad|dZ Zbd}dZ Zcd~dZ ZdddZ ZeddZ ZfddZ ZgddZ ZhddZ ZiddZ ZjddZ ZkddZ ZlddZ ZmddZ ZnddZ ZoddZ ZpddZ ZqddZ ZrddZ ZsddZ ZtddZ ZuddZ ZvddZ ZwddZ ZxddZ ZyddZ ZzddZ Z{ddZ Z|eKZ}eKZ~dd Zdd Zdd Zdd ZddZ ZddZ Zdd Zdd Zdd ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ ZddZ Zdd Zdd ZddZ Zdd Zdd Zdd Zdd Zdd Zdd ZddĄ ZddƄ ZddȄ Zddʄ Zdd̄ Zdd΄ ZddЄ Zdd҄ ZddԄ Zddք Zdd؄ Zddڄ Zdd܄ Zddބ Zdd Zdd Zdd Zdd Zdd Z	d	d
ddZdddZdddZdddZdddZdd ZdS (  InterpreterBuilderreturnNonec                 C  s2   d | _ t | _i | _tj| jd< dd | jd< d S )Nconvert_custom_typesc                 S  s   dS )N)r   r   r   r   )lhsTyperhsTyper   r   r    <lambda>;  s    z-InterpreterBuilder.__init__.<locals>.<lambda>min_dot_size)rf   ra   optionscodegen_fnsr   r   r   r   r   r    r?   6  s
   zInterpreterBuilder.__init__c                 C  sR   || j d k std|| j d k std|| j d k s!td|||f| _d S )Nr   zx >= grid_dim[0]r   zy >= grid_dim[1]r   zz >= grid_dim[2])grid_dimr   grid_idxr   r   yzr   r   r    set_grid_idx=  s   zInterpreterBuilder.set_grid_idxc                 C  s   |||f| _ d S r"   )r   )r   nxnynzr   r   r    set_grid_dimF     zInterpreterBuilder.set_grid_dimc                 C     t jS r"   )rw   r   r   r   r   r    get_half_tyK     zInterpreterBuilder.get_half_tyc                 C  r   r"   )rw   r   r   r   r   r    get_bf16_tyN  r   zInterpreterBuilder.get_bf16_tyc                 C  r   r"   )rw   r   r   r   r   r    get_float_tyQ  r   zInterpreterBuilder.get_float_tyc                 C  r   r"   )rw   r   r   r   r   r    get_double_tyT  r   z InterpreterBuilder.get_double_tyc                 C  r   r"   )rw   r   r   r   r   r    get_int1_tyW  r   zInterpreterBuilder.get_int1_tyc                 C  r   r"   )rw   r~   r   r   r   r    get_int8_tyZ  r   zInterpreterBuilder.get_int8_tyc                 C  r   r"   )rw   r}   r   r   r   r    get_uint8_ty]  r   zInterpreterBuilder.get_uint8_tyc                 C  r   r"   )rw   r   r   r   r   r    get_int16_ty`  r   zInterpreterBuilder.get_int16_tyc                 C  r   r"   )rw   r   r   r   r   r    get_uint16_tyc  r   z InterpreterBuilder.get_uint16_tyc                 C  r   r"   )rw   r   r   r   r   r    get_int32_tyf  r   zInterpreterBuilder.get_int32_tyc                 C  r   r"   )rw   r   r   r   r   r    get_uint32_tyi  r   z InterpreterBuilder.get_uint32_tyc                 C  r   r"   )rw   r   r   r   r   r    get_int64_tyl  r   zInterpreterBuilder.get_int64_tyc                 C  r   r"   )rw   rI   r   r   r   r    get_uint64_tyo  r   z InterpreterBuilder.get_uint64_tyc                 C  r   r"   )rw   r   r   r   r   r    get_fp8e4nv_tyr  r   z!InterpreterBuilder.get_fp8e4nv_tyc                 C  r   r"   )rw   r   r   r   r   r    get_fp8e4b15_tyu  r   z"InterpreterBuilder.get_fp8e4b15_tyc                 C  r   r"   )rw   r   r   r   r   r    get_fp8e4b8_tyx  r   z!InterpreterBuilder.get_fp8e4b8_tyc                 C  r   r"   )rw   r   r   r   r   r    get_fp8e5_ty{  r   zInterpreterBuilder.get_fp8e5_tyc                 C  r   r"   )rw   r   r   r   r   r    get_fp8e5b16_ty~  r   z"InterpreterBuilder.get_fp8e5b16_tyc                 C     t ||S r"   )rw   rx   )r   elt_ty
addr_spacer   r   r    
get_ptr_ty     zInterpreterBuilder.get_ptr_tyc                 C  r   r"   )rw   r   )r   r   r:   r   r   r    get_block_ty  r   zInterpreterBuilder.get_block_tyc                 C  s   t tj|gtjdtjS Nr@   )r   rA   arraybool_rw   r   r   r.   r   r   r    get_int1     zInterpreterBuilder.get_int1c                 C     t tj|gtjdtjS r   )r   rA   r   r}   rw   r  r   r   r    	get_uint8  r  zInterpreterBuilder.get_uint8c                 C  r  r   )r   rA   r   r~   rw   r  r   r   r    get_int8  r  zInterpreterBuilder.get_int8c                 C  r  r   )r   rA   r   r   rw   r  r   r   r    
get_uint16  r  zInterpreterBuilder.get_uint16c                 C  r  r   )r   rA   r   r   rw   r  r   r   r    	get_int16  r  zInterpreterBuilder.get_int16c                 C  r  r   )r   rA   r   r   rw   r  r   r   r    
get_uint32  r  zInterpreterBuilder.get_uint32c                 C  r  r   )r   rA   r   r   rw   r  r   r   r    	get_int32  r  zInterpreterBuilder.get_int32c                 C  r  r   )r   rA   r   rI   rw   r  r   r   r    
get_uint64  r  zInterpreterBuilder.get_uint64c                 C  r  r   )r   rA   r   r   rw   r  r   r   r    	get_int64  r  zInterpreterBuilder.get_int64c                 C  r  r   )r   rA   r   r   rw   r  r   r   r    get_fp16  r  zInterpreterBuilder.get_fp16c                 C  r  r   )r   rA   r   r   rw   r  r   r   r    get_fp32  r  zInterpreterBuilder.get_fp32c                 C  r  r   )r   rA   r   r   rw   r  r   r   r    get_fp64  r  zInterpreterBuilder.get_fp64c                 C  s   t tjdgt|d|S Nr   r@   )r   rA   r   r   )r   typer   r   r    get_null_value  r  z!InterpreterBuilder.get_null_valuec                 C  s2   | j d u r	tdttj| j | gtjdtjS )Nzgrid_idx is Noner@   )r   r   r   rA   r   r   rw   r   axisr   r   r    create_get_program_id  s   
 z(InterpreterBuilder.create_get_program_idc                 C  s    t tj| j| gtjdtjS r   )r   rA   r   r   r   rw   r  r   r   r    create_get_num_programs  s    z*InterpreterBuilder.create_get_num_programsc                 C  s0   t tj|jtdtj}d }| ||||||S r   )r   rA   	ones_liker   r#   rw   r   create_masked_load)r   ptr_0_1is_volatilemaskotherr   r   r    create_load  s   zInterpreterBuilder.create_loadc                 C  s*   t tj|jtdtj}| |||d d S r   )r   rA   r  r   r#   rw   r   create_masked_store)r   r  valr  r  r  r   r   r    create_store  s   zInterpreterBuilder.create_storec           
      C  sN   |  }t|}|d u rttj|j|d|}t|j|j|j|}	t|	|S r   )r)   r   r   rA   r   r   _interpreterload)
r   rN   r  r  cache_modifiereviction_policyr  rL   dtype_npretr   r   r    r    s   
z%InterpreterBuilder.create_masked_loadc                 C  s   t |j|j|jS r"   )r#  storer   )r   rN   r.   r  r%  r&  r   r   r    r        z&InterpreterBuilder.create_masked_storec                 C  st   |j j}|j}|tjkr|tjks|tjkr.|tjkr.t|j||d t|}t	||jS t	|j
t||jS r"   )r   rJ   rw   r   r   r   r   viewr   r   rH   )r   srcdst_typesrc_element_typedst_element_typer   r   r   r    	cast_impl  s   zInterpreterBuilder.cast_implc                 C     |  ||S r"   r0  r   r,  r-  r   r   r    r         zInterpreterBuilder.<lambda>c                 C  r1  r"   r2  r3  r   r   r    r     r4  c                 C  r1  r"   r2  r3  r   r   r    r     r4  c                 C  r1  r"   r2  r3  r   r   r    r     r4  c                 C  r1  r"   r2  r3  r   r   r    r     r4  c                 C  r1  r"   r2  r3  r   r   r    r     r4  c                 C  r1  r"   r2  )r   r,  r-  	is_signedr   r   r    r     r4  c                 C  s4   |j j}|j}t|j|||t|}t||jS r"   )r   rJ   r   r   r+  r   r   )r   r,  r-  r   r.  r/  r   r   r   r    r     s   z"InterpreterBuilder.create_fp_to_fpc                 C  s   t |jt||jS r"   )r   r   r+  r   rJ   r3  r   r   r    create_bitcast     z!InterpreterBuilder.create_bitcastc                 C  s8   ||j |j }|jj}t||s|t|}t||S r"   r   r   rJ   r   rH   r   r   )r   lhsrhsopr   rz   r   r   r    	binary_op  s
   

zInterpreterBuilder.binary_opc                 C     |  ||tjS r"   r<  rA   addr   r9  r:  r   r   r    r         c                 C  r=  r"   r<  rA   multiplyr@  r   r   r    r     rA  c                 C  r=  r"   r<  rA   divider@  r   r   r    r     rA  c                 C  r=  r"   r<  rA   fmodr@  r   r   r    r     rA  c                 C  r=  r"   r<  rA   subtractr@  r   r   r    r     rA  c                 C  r=  r"   rB  r@  r   r   r    r     rA  c                 C  r=  r"   rD  r@  r   r   r    r     rA  c                 C  r1  r"   create_idivr@  r   r   r    r     r4  c                 C  r1  r"   rJ  r@  r   r   r    r     r4  c                 C  r=  r"   rF  r@  r   r   r    r     rA  c                 C  r=  r"   rF  r@  r   r   r    r     rA  c                 C  r=  r"   r>  r@  r   r   r    r     rA  c                 C  r=  r"   rH  r@  r   r   r    r     rA  c                 C  r=  r"   )r<  rA   
left_shiftr@  r   r   r    r      rA  c                 C  r=  r"   )r<  rA   right_shiftr@  r   r   r    r     rA  c                 C  r=  r"   r<  rA   r   r@  r   r   r    r     rA  c                 C  r=  r"   rN  r@  r   r   r    r     rA  c                 C  r=  r"   rN  r@  r   r   r    r     rA  c                 C  r=  r"   rN  r@  r   r   r    r     rA  c                 C  r=  r"   r<  rA   r   r@  r   r   r    r     rA  c                 C  r=  r"   rO  r@  r   r   r    r     rA  c                 C  r=  r"   rO  r@  r   r   r    r     rA  c                 C  r=  r"   rO  r@  r   r   r    r   	  rA  c                 C  r=  r"   r<  rA   
less_equalr@  r   r   r    r   
  rA  c                 C  r=  r"   r<  rA   lessr@  r   r   r    r     rA  c                 C  r=  r"   r<  rA   greater_equalr@  r   r   r    r     rA  c                 C  r=  r"   r<  rA   greaterr@  r   r   r    r     rA  c                 C  r=  r"   rP  r@  r   r   r    r     rA  c                 C  r=  r"   rR  r@  r   r   r    r     rA  c                 C  r=  r"   rT  r@  r   r   r    r     rA  c                 C  r=  r"   rV  r@  r   r   r    r     rA  c                 C  r=  r"   r<  rA   equalr@  r   r   r    r     rA  c                 C  r=  r"   r<  rA   	not_equalr@  r   r   r    r     rA  c                 C  r=  r"   rR  r@  r   r   r    r     rA  c                 C  r=  r"   rV  r@  r   r   r    r     rA  c                 C  r=  r"   rP  r@  r   r   r    r     rA  c                 C  r=  r"   rT  r@  r   r   r    r     rA  c                 C  r=  r"   rX  r@  r   r   r    r     rA  c                 C  r=  r"   rZ  r@  r   r   r    r     rA  c                 C  r=  r"   rR  r@  r   r   r    r     rA  c                 C  r=  r"   rV  r@  r   r   r    r     rA  c                 C  r=  r"   rP  r@  r   r   r    r     rA  c                 C  r=  r"   rT  r@  r   r   r    r     rA  c                 C  r=  r"   rX  r@  r   r   r    r     rA  c                 C  r=  r"   rZ  r@  r   r   r    r     rA  c                 C  r=  r"   )r<  rA   bitwise_andr@  r   r   r    r      rA  c                 C  r=  r"   )r<  rA   bitwise_xorr@  r   r   r    r   !  rA  c                 C  r=  r"   )r<  rA   
bitwise_orr@  r   r   r    r   "  rA  c                 C  s&   t |jt|j|j |j |jjS r"   )r   r   rA   rG  r   rJ   r@  r   r   r    rK  &  s   &zInterpreterBuilder.create_idivc                 C  sD   t |jj}t |jj}|j||_|j||_| ||tjS r"   )r   r   r   rH   r<  rA   rM  )r   r9  r:  	lhs_dtype	rhs_dtyper   r   r    create_ashr,  s
   zInterpreterBuilder.create_ashrc                 C  s   |j j}|tjks|tjkrtt|j |j |jjS ttd|j	d d  }|j 
|}|j 
|}t|||j	d ? }t|
||jjS )Nr   r   r   )r   r   rA   r   rI   r   np_umulhi_u64rJ   r   r   rH   rC  )r   r9  r:  r   compute_dtypelhs_datarhs_dataret_datar   r   r    create_umulhi4  s   z InterpreterBuilder.create_umulhic                 C  s<   ||j |j |j }|jj}t||s|t|}t||S r"   r8  )r   r9  r:  r  r;  r   rz   r   r   r    
ternary_op@  s
   

zInterpreterBuilder.ternary_opc                 C     |  |||tjS r"   )rh  rA   clip)r   arglohipropagate_nansr   r   r    r   I      c                 C  ri  r"   )rh  rA   where)r   condr9  r:  r   r   r    r   J  ro  c                 C  s   t |j|j |j |jjS r"   r   r   r   rJ   r   r   r   r    
create_fmaL     zInterpreterBuilder.create_fmac                 C  s   t ||j|jjS r"   rr  )r   rk  r;  r   r   r    unary_opP  r*  zInterpreterBuilder.unary_opc                 C  sZ   |j }|jd }ttd|j }|j|}d|> d }||@ t|}t||j jS )Nr   r   )	r   r   r   rA   r   r+  r   r   rJ   )r   rk  rL   mask_bitwidthnp_uint_dtyper   r  r(  r   r   r    create_fabsS  s   
zInterpreterBuilder.create_fabsc                 C     |  |tjS r"   )ru  rA   cosr   rk  r   r   r    r   ]      c                 C  ry  r"   )ru  rA   expr{  r   r   r    r   ^  r|  c                 C  ry  r"   )ru  rA   exp2r{  r   r   r    r   _  r|  c                 C  ry  r"   )ru  rA   absr{  r   r   r    r   `  r|  c                 C  ry  r"   )ru  rA   floorr{  r   r   r    r   a  r|  c                 C  ry  r"   )ru  rA   ceilr{  r   r   r    r   b  r|  c                 C  ry  r"   )ru  rA   logr{  r   r   r    r   c  r|  c                 C  ry  r"   )ru  rA   log2r{  r   r   r    r   d  r|  c                 C  ry  r"   ru  rA   sqrtr{  r   r   r    r   e  r|  c                 C  ry  r"   r  r{  r   r   r    r   f  r|  c                 C  ry  r"   )ru  rA   sinr{  r   r   r    r   g  r|  c                 C  s0   |j jtjkrt|j nt|j }t||jjS r"   )r   r   rA   r   np_erf_fp32np_erf_fp64r   rJ   )r   rk  r(  r   r   r    
create_erfi  s   "zInterpreterBuilder.create_erfc                 C  s   t dt|j |jjS Nr   )r   rA   r  r   r   rJ   r{  r   r   r    create_rsqrtm  r  zInterpreterBuilder.create_rsqrtc                 C  s   t |j||jjS r"   )r   r   rG   r   rJ   )r   rk  r:   allow_reorderr   r   r    r   q  s    c                 C     t t|j||jjS r"   )r   rA   	transposer   r   rJ   )r   rk  permr   r   r    create_transs  r7  zInterpreterBuilder.create_transc                 C  s   |j }|j }|jjdkr|j s|jjdkr6|j r6t||jtjd tj}t||jtjd tj}t	tj
|||j jd|j  |jjS )Nr   r@   )r   r   r   is_floatingr   rw   r   r+  rA   r   matmulrJ   )r   r   r   dinput_precisionmax_num_imprecise_acca_datab_datar   r   r    
create_dotv  s   $zInterpreterBuilder.create_dotc                 C  s   t tj||tjdtjS r   )r   rA   rF   r   rw   )r   ret_tystartstopr   r   r    create_make_range  r  z$InterpreterBuilder.create_make_rangec                 C  s   |d u rt tj|jtdtj}tj|j|jjd}t|j|jt	|j}tj
||d|f|dd }|d  t|j 8  < t |tjS )Nr@   r   )binsrD   weights)r   rA   r  r   r#   rw   r   r   rp  r   	histogramlogical_notsumr   )r   r   r  r  dummy_weightsr  r   r   r    create_histogram  s   z#InterpreterBuilder.create_histogramc                 C  s   t tj|j|j|d|jjS )Nr  )r   rA   take_along_axisr   r   rJ   )r   r,  indicesr  r   r   r    create_gather  s   z InterpreterBuilder.create_gatherc                 C  s<   |  }|j}td|d }t|j||jtj  |jS )Nr   r   )	r)   r   maxr   r   rH   rA   rI   r   )r   r  offsetrL   element_bitwidthelement_bytewidthr   r   r    create_addptr  s    z InterpreterBuilder.create_addptrc                 C  s   | |\}}| }	t|	}
|d u rd }n.|tjjkr(ttj|j	|
d|	}n|tjj
kr=ttj|j	td|
d|	}ntd| | ||||||S )Nr@   nanzunsupported padding option )rS   r)   r   r   PADDING_OPTIONPAD_ZEROr   rA   r   r   PAD_NAN	full_likefloatr   r  )r   r  rK   padding_optionr%  r&  r  rN   rO   rL   r'  r  r   r   r    create_tensor_pointer_load  s   z-InterpreterBuilder.create_tensor_pointer_loadc                 C  s    | |\}}| |||||S r"   rS   r   )r   r  r.   rK   r%  r&  rN   rO   r   r   r    create_tensor_pointer_store     z.InterpreterBuilder.create_tensor_pointer_storec                 C  r  r"   )r   rA   expand_dimsr   r   rJ   )r   rk  r  r   r   r    create_expand_dims  r7  z%InterpreterBuilder.create_expand_dimsc                 C  r  r"   )r   rA   rB   r   r   rJ   )r   rk  r:   r   r   r    create_broadcast  r7  z#InterpreterBuilder.create_broadcastc                 C  s   t t|j|jg|jjS r"   )r   rA   concatenater   r   rJ   r@  r   r   r    
create_cat  rt  zInterpreterBuilder.create_catc                 C  s    t tj|j|jgdd|jjS )NrZ   r  )r   rA   stackr   r   rJ   r@  r   r   r    create_join  s    zInterpreterBuilder.create_joinc                 C  s(   t |jd |jjt |jd |jjfS )N).r   ).r   rr  )r   r!  r   r   r    create_split  s   (zInterpreterBuilder.create_splitc                 C  s\   |j }t|jtjrttj||jd t	|jd|jj
S ttj||jt	|jd|jj
S r  )r:   rv   r   rw   r   r   rA   fullr   r   rJ   )r   r  rk  r:   r   r   r    create_splat  s   &"zInterpreterBuilder.create_splatc                 C  s&   t tjd|jd t|jd|jjS )Nr   r   r@   )r   rA   r  r   r   r   rJ   r{  r   r   r    create_unsplat  s   &z!InterpreterBuilder.create_unsplatc                 C  sB   || j vrtd| | j | }tt|j|j|j||jjS )Nunsupported semantic )ir_sem_to_interpreter_semr   r   r#  
atomic_casr   r   rJ   )r   r  cmpr!  semscoper   r   r    create_atomic_cas  s   

 z$InterpreterBuilder.create_atomic_casc                 C  sf   || j vrtd| || jvrtd| | j | }| j| }tt||j|j|j||jjS )Nzunsupported rmwOp r  )	ir_rmw_op_to_interpreter_rmw_opr   r  r   r#  
atomic_rmwr   r   rJ   )r   rmwOpr  r!  r  r  r  r   r   r    create_atomic_rmw  s   



"z$InterpreterBuilder.create_atomic_rmwc                 C     t d)Nz4extern_elementwise not supported in interpreter modeNotImplementedError)r   libNamelibPathsymbolargListretTypeisPurer   r   r    create_extern_elementwise     z,InterpreterBuilder.create_extern_elementwisec                 C  r  )Nz,inline_asm not supported in interpreter moder  )r   	inlineAsmconstraintsvaluesr  r  packr   r   r    create_inline_asm  r  z$InterpreterBuilder.create_inline_asmc                 C  s   d| j d  d| j d  d| j d  d}|r|d| 7 }|r*tjdd	d
 id |D ]}t|d|j   q,|rCtjd d d S d S )N(r   z, r   r   ) r$   c                 S  s   d| dS )N0x02xr   r   r   r   r    r     r4  z1InterpreterBuilder.create_print.<locals>.<lambda>)	formatter)r   rA   set_printoptionsprintr   )r   prefixhexr  isSignedmsgr.   r   r   r    create_print  s   *zInterpreterBuilder.create_printc                 C  s   |sJ | d S r"   r   )r   	conditionmessager   r   r    create_assert  s   z InterpreterBuilder.create_assertc                 C  s   |sJ dd S )NzAssume failedr   )r   r  r   r   r    create_assume  r   z InterpreterBuilder.create_assumec                 C  s   d S r"   r   r   r   r   r    create_barrier  s   z!InterpreterBuilder.create_barrierc                 C  s    dd |D }t ||||||S )Nc                 S     g | ]}|  qS r   r+   .0r  r   r   r    
<listcomp>      z<InterpreterBuilder.create_make_block_ptr.<locals>.<listcomp>)r8   )r   r9   r:   r;   r<   r=   r>   new_offsetsr   r   r    create_make_block_ptr  s   z(InterpreterBuilder.create_make_block_ptrc                 C  sv   t |jt |krtddd |jD }t|j|j|j||j|j}t	t |D ]}|j|  j
|| j
7  _
q)|S )Nz len(ptr.offsets) != len(offsets)c                 S  r  r   r  r  r   r   r    r    r  z5InterpreterBuilder.create_advance.<locals>.<listcomp>)rE   r<   r   r8   r9   r:   r;   r=   r>   rD   r   )r   r  r<   r  r(  r   r   r   r    create_advance  s   z!InterpreterBuilder.create_advancezeror9   r   r:   rU   r;   tensor_shaperV   r5  r#   rX   re   c                 C  s   t |||||}|  |S r"   )rT   r_   )r   r9   r:   r;   r  r5  rX   descr   r   r    create_make_tensor_descriptor  s   z0InterpreterBuilder.create_make_tensor_descriptorr  rT   r  c                 C  s   t |tsJ ||\}}| }t|}|j}	|	tjjkr+t	t
j|j|d|}
n|	tjjkr@t	t
j|jtd|d|}
ntd|	 | j|||
||ddS )Nr@   r  zunsupported padding F)r%  r&  r  )rv   rT   rS   r)   r   rX   r   r  r  r   rA   r   r   r  r  r  r   r  )r   r  r  r%  r&  rN   r  rL   r'  rX   r  r   r   r    create_descriptor_load  s   z)InterpreterBuilder.create_descriptor_loadr.   c                 C  s    | |\}}| |||d d S r"   r  )r   r  r.   r  rN   r  r   r   r    create_descriptor_store"  r  z*InterpreterBuilder.create_descriptor_store	x_offsetsy_offsetc                 C  s   |j jj}t|}tj|jjd |jd g|d}d }d }	t	|jD ]\}
}t
|tj|g}| ||||	j||
d d f< q"t
||S )Nr   rZ   r@   )r9   r   r'   r   rA   zerosr   r:   r=   	enumerater   rw   r   r  )r   r  r   r  r  r   np_dtyperesultr%  r&  r   x_offsetr  r   r   r    create_descriptor_gather&  s   
  
z+InterpreterBuilder.create_descriptor_gatherc           	      C  sH   t |jD ]\}}t|j| |j}t|tj|g}| ||| qd S r"   )r  r   r   r   rw   r   r  )	r   r  r.   r   r  r   r  slicer  r   r   r    create_descriptor_scatter1  s
   z,InterpreterBuilder.create_descriptor_scatterc                 C  sZ   t |}d|jv rttjdd|d|jS |tjkr&ttjdd|d|jS td| )Nrs   r   rZ   r@   Tzunsupported type )r   namer   rA   r  rJ   r   	TypeError)r   r  np_typer   r   r    get_all_ones_value8  s   

z%InterpreterBuilder.get_all_ones_valueNr   r   )r  )r9   r   r:   rU   r;   rU   r  rV   r5  r#   rX   re   )r  rT   r  rU   )r  rT   r.   r   r  rU   )r  rT   r   r   r  r   )r  rT   r.   r   r   r   r  r   )r0   r1   r2   r   MEM_SEMANTICACQUIREr#  RELEASERELAXEDACQUIRE_RELEASEr  	ATOMIC_OPADDRMW_OPFADDMINUMINMAXUMAXANDORXORXCHGr  r?   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r	  r
  r  r  r  r  r  r  r  r  r  r"  r  r   r0  create_si_to_fpcreate_ui_to_fpcreate_fp_to_sicreate_fp_to_uicreate_fp_extcreate_fp_trunccreate_int_castr   r6  r<  create_faddcreate_fmulcreate_fdivcreate_fremcreate_fsub
create_mulcreate_precise_divfcreate_sdivcreate_udivcreate_sremcreate_urem
create_add
create_sub
create_shlcreate_lshrcreate_minsicreate_minuicreate_minimumfcreate_minnumfcreate_maxsicreate_maxuicreate_maximumfcreate_maxnumfcreate_icmpSLEcreate_icmpSLTcreate_icmpSGEcreate_icmpSGTcreate_icmpULEcreate_icmpULTcreate_icmpUGEcreate_icmpUGTcreate_icmpEQcreate_icmpNEcreate_fcmpOLTcreate_fcmpOGTcreate_fcmpOLEcreate_fcmpOGEcreate_fcmpOEQcreate_fcmpONEcreate_fcmpULTcreate_fcmpUGTcreate_fcmpULEcreate_fcmpUGEcreate_fcmpUEQcreate_fcmpUNE
create_and
create_xor	create_orcreate_int_to_ptrcreate_ptr_to_intrK  ra  rg  rh  create_clampfcreate_selectrs  ru  rx  
create_cos
create_expcreate_exp2create_iabscreate_floorcreate_ceil
create_logcreate_log2create_precise_sqrtcreate_sqrt
create_sinr  r  create_reshaper  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r  r   r   r   r    r   !  s^   	
		
		r   c                   @  s.   e Zd ZdZdddZdddZdddZdS )_LangPatchScopez2Tracks patched attributes so they can be restored.r   r   c                 C  s
   g | _ d S r"   )_changesr   r   r   r    r?   H  s   
z_LangPatchScope.__init__objobjectr
  re   r.   c                 C  s.   t ||t}| j|||f t||| d S r"   )r   _MISSINGrh  appendsetattr)r   ri  r
  r.   originalr   r   r    r/   K  s   z_LangPatchScope.set_attrc                 C  sD   | j r | j  \}}}|tu rt|| nt||| | j sd S d S r"   )rh  poprk  delattrrm  )r   ri  r
  rn  r   r   r    restoreP  s   z_LangPatchScope.restoreNr  )ri  rj  r
  re   r.   rj  r   r   )r0   r1   r2   r3   r?   r/   rq  r   r   r   r    rg  E  s
    

rg  r  c                   s,   t | |d fdd
}|| || d S )N)memberc                   s$   | |i dd |  D d iS )Nc                 S  s   i | ]\}}|d kr||qS )r   r   r  kvr   r   r    
<dictcomp>\  s
    z1_patch_attr.<locals>.<lambda>.<locals>.<dictcomp>r   items)rr  argskwargssemanticr   r    r   [  s    z_patch_attr.<locals>.<lambda>)r	   r/   )ri  r
  rr  r   r  
new_memberr   r{  r    _patch_attrY  s   r~  c                 C  s4   t | D ]\}}tj|rt| |||| qd S r"   )inspect
getmembersrw   core
is_builtinr~  )pkgr   r  r
  rr  r   r   r    _patch_builtinb  s
   r  c                   sr   dd  dd }| | ddd  | | d fd	d | | d
dd  | | ddd  | | dt| d S )Nc                 S  s   | j j}|jdkrt|S dS )Nr   T)r   r   sizer#   )r   r   r   r   r    	_get_boolj  s   z%_patch_lang_tensor.<locals>._get_boolc                 S  sj   t t| jj| jj}| j sJ t| jj	}|d |d |d< |d< t
j| j|}t
j||S )NrZ   )r   rA   r  r   r   r   r  is_blocklistr:   rw   r  r   r   )r   r   r=   res_tyr   r   r    _get_transposep  s   z*_patch_lang_tensor.<locals>._get_transpose	__index__c                 S     t | jjS r"   )rs   r   r   r   r   r   r    r   x  r4  z$_patch_lang_tensor.<locals>.<lambda>r%   c                   s    | S r"   r   r   r  r   r    r   y  s    __repr__c                 S  r  r"   )reprr   r   r   r   r   r    r   z  r4  __str__c                 S  r  r"   )re   r   r   r   r   r   r    r   {  r4  r   )r/   property)r   r  r  r   r  r    _patch_lang_tensorh  s   r  c                   @  s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )ReduceScanOpInterfacec                 C  s   || _ || _d S r"   )r  
combine_fn)r   r  r  r   r   r    r?     s   
zReduceScanOpInterface.__init__c                 C  s0   |d ur|t |krtd| d| d S d S )Nzaxis z out of bounds for shape )rE   r   )r   r:   r  r   r   r    
check_axis  s   z ReduceScanOpInterface.check_axisc                 C  s>   |D ]}t |tjjstdt| | |j| j qd S )Nzinput must be a tensor, got )	rv   rw   r  r   r   r  r  r:   r  )r   r   rk  r   r   r    check_tensor  s
   z"ReduceScanOpInterface.check_tensorc                 C  s`   t |}t|dr|jr||}t|t|j}n
tj|g|d}|}tj	
t||j|S )Nr:   r@   )r   r(   r:   rH   rw   r   r  rA   r   r  r   r   rJ   )r   r(  r   r  ret_typer   r   r    	to_tensor  s   
zReduceScanOpInterface.to_tensorc                 C  sJ   t |ts| |fd S | | | |}t |ttfr"t|S |fS Nr   )rv   tupleapplyr  
apply_implr  )r   r   r(  r   r   r    r    s
   


zReduceScanOpInterface.applyN)r0   r1   r2   r?   r  r  r  r  r   r   r   r    r    s    
r  c                      sF   e Zd Z fddZdd Zdd Zddd	Zd
d Zdd Z  Z	S )	ReduceOpsc                      t  || || _d S r"   )superr?   	keep_dims)r   r  r  r  	__class__r   r    r?        
zReduceOps.__init__c                 C  sN   g }|D ]}|d ur| | qd}| | |jj |j qt||fS r  )rl  r  r   r   flattenr   r  )r   r   r  r(  r   r   r   r    unravel  s   zReduceOps.unravelc                   s  j } j \ }g }g } d jjj}|d| ||d d   } D ]}||jj |tj||jjjd q't	|d j
D ]}	t|	|d| |d d   t fddt|D }
| dkrt	t|D ]}|
| jj || < qvqEt fddt|D }jjg ||
R  }t|ts|fn|}t	t|D ]}t|| tjjr|| jj n|| || < qqEg }t|D ]6\}	}jr|d urt||}nt	t|D ]}t|d}qn|d u r| }|| |	 j q|S )Nr   r   r@   c                 3  *    | ]\}} |  | jV  qd S r"   r  r   r  iir  )r   input_indexr   r   r    	<genexpr>     ( z+ReduceOps.generic_reduce.<locals>.<genexpr>c                 3  r  r"   r  r  oio)r   output_indexr   r   r    r    r  )r  r  r   r   r:   rl  rA   r  r   rD   r  unravel_indexr  r  rE   r[   r  fnrv   rw   r  r   r  r  r  )r   r   original_axisr  
input_dataoutput_datainput_shapeoutput_shaperk  r   input_tuplej	acc_tuplecombine_fn_retr(  r   _r   )r   r  r  r   r    generic_reduce  sN   zReduceOps.generic_reduceNc                 C  s   t |tr	|d n|}d }d }|r!| ||jj| j| jd|j}|r3| ||jj| j| jdtj	}|d ur?|d ur?||fS |d urE|S |d urK|S t
d)Nr   r  keepdimsz-val_reduce_op and idx_reduce_op are both None)rv   r  r  r   r   r  r  r   rw   r   r   )r   r   val_reduce_opidx_reduce_opr!  idxr   r   r    min_max  s     zReduceOps.min_maxc                 C  s"   |  tj|jj| j| jd|jS )Nr  )r  rA   r  r   r   r  r  r   r   r   r   r   r    r       "zReduceOps.sumc                 C  s   | j tjjkr| j|d tjtjdS | j tjjkr&| j|d tj	tj
dS | j tjjkr8| j|d tjd dS | j tjjkrJ| j|d tjd dS | j tjjkrX| |d S | |S )Nr   )r  r  )r  rw   standard_argmin_combine_tie_break_leftr  rA   minargmin_argmax_combine_tie_break_leftr  argmax_elementwise_maxnanmax_elementwise_minnanmin_sum_combiner  r  r  r   r   r    r    s   
zReduceOps.apply_implr"   )
r0   r1   r2   r?   r  r  r  r  r  __classcell__r   r   r  r    r    s    

+r  c                      s<   e Zd Z fddZdd Zdd Zdd Zd	d
 Z  ZS )ScanOpsc                   r  r"   )r  r?   reverse)r   r  r  r  r  r   r    r?     r  zScanOps.__init__c                 C  "   | j tj|jj| jd|jdgS Nr  r@   )r  rA   cumsumr   r   r  r   r  r   r   r    r    r  zScanOps.cumsumc                 C  r  r  )r  rA   cumprodr   r   r  r   r  r   r   r    r  
  r  zScanOps.cumprodc                   s  g }g }d j jj}D ]}||j j |tj||j jjd qt|d jD ]}t	|| t
 fddt|D } j dkr_tt|D ]}|| j j ||  < qOq+t
 fddtt D t
fddt|D }	jjg |	|R  }
t|
t
s|
fn|
}	tt|D ]}t|	| tjjr|	| j j n|	| ||  < qq+g }t|D ]\}}||| j q|S )Nr   r@   c                 3  s*    | ]\}} |  | jV  qd S r"   r  r  )indexr   r   r   r    r    r  z'ScanOps.generic_scan.<locals>.<genexpr>c                 3  s.    | ]}|j kr | d  n | V  qdS )r   Nr  )r  r   )r  r   r   r    r    s   , c                 3  r  r"   r  r  )r   
prev_indexr   r   r    r    r  )r   r   r:   rl  rA   r  r   rD   r  r  r  r  r  rE   r[   r  r  rv   rw   r  r   r  )r   r   r  r  r:   rk  r   r   r  r  r  r(  r   )r  r   r  r   r    generic_scan  s8    zScanOps.generic_scanc              	   C  s   g }| j r|D ]}|| tj|jj| jd|j qn|}| j	t
jjkr.| |d }n| j	t
jjkr=| |d }n| |}| j rV|D ]}tj|jj| jd|j_qG|S )Nr  r   )r  rl  r  rA   flipr   r   r  r   r  rw   r  r  r  _prod_combiner  r  )r   r   	new_inputrk  r(  r   r   r    r  +  s   &
zScanOps.apply_impl)	r0   r1   r2   r?   r  r  r  r  r  r   r   r  r    r    s    r  c                 C  sT   ddd}ddd}|  td| |  td| |  tjd| |  tjd| d S )	NFc                 [     t |||| S r"   )r  r  )r   r  r  r  rz  r   r   r    _new_reduceC  r,   z'_patch_reduce_scan.<locals>._new_reducec                 [  r  r"   )r  r  )r   r  r  r  rz  r   r   r    	_new_scanF  r,   z%_patch_reduce_scan.<locals>._new_scanreduceassociative_scan)F)r/   rw   r  )r  r  r  r   r   r    _patch_reduce_scan?  s   

r  c                 C  s   dd }ddd}ddd}dd	 }| | d
| | | d| | | d| | | dt | | jd| | | dt|dd | | dt|dd | | dt|dd t| d S )Nc                 S  sB  | j dkr	| S | j dkr| S | j dkr| S | j dkr$| S | j dkr-| S | j dkr6| S | j dkr?| S | j dkrH| S | j d	krQ|	 S | j d
krZ|
 S | j dkrc| S | j dkrl| S | j dkru| S | j dkr~| S | j dkr| S | j dkr| S | j dkr| S td|  d)Nvoidr   r~   r}   r   r   r   r   r   rI   rg   ri   rk   fp16bf16fp32fp64zfail to convert z to ir type)r
  get_void_tyr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r    
_new_to_irQ  sF   
















z$_patch_lang_core.<locals>._new_to_irc                 [  s6   |d u rd}|d u rd| }}n| |}}t |||S )Nr   r   )rD   )arg1arg2steprz  r  endr   r   r    
_new_rangey  s   
z$_patch_lang_core.<locals>._new_range c                 S  s   | sJ |d S r"   r   )rq  r  r   r   r    _new_static_assert  r   z,_patch_lang_core.<locals>._new_static_assertc                 S  sn   t | tjs| S t |ttfs|gn|}dd |D }t|tdt| jkr.td| | j	
|| | S )Nc                 S  s"   g | ]}t |tjr|jn|qS r   )rv   rw   	constexprr.   r  ru  r   r   r    r    s   " z7_patch_lang_core.<locals>._set_attr.<locals>.<listcomp>r   z$len(values) != len(input.shape) for )rv   rw   r   r  r  rE   r  r:   r   r   r/   )r   r  r
  r   r   r    	_set_attr  s   z#_patch_lang_core.<locals>._set_attrrD   static_rangestatic_assertstatic_printto_irmultiple_ofztt.divisibilityr
  max_contiguousztt.contiguitymax_constancyztt.constancy)NN)r  )r/   r  r   r   r  )langr  r  r  r  r  r   r   r    _patch_lang_coreO  s   
(
	r  c                 C  s   t  }dd | j D }t|dksJ d|D ]%}t|t| t|jt| |tkr3t|jt| t	|j| t
|| qttjjt| |S )Nc                 S  s,   g | ]\}}t |r|ttjfv r|qS r   )r  ismodulerw   r  )r  r  r.   r   r   r    r    s   , z_patch_lang.<locals>.<listcomp>r   z:triton.language must be visible from within jit'd function)rg  __globals__rx  rE   r  interpreter_builderr   rw   r   r  r  r  tensor_descriptor_base)r  r  langsr  r   r   r    _patch_lang  s   r  c                 C  s"   t | drt| | S t| |S )N_fields)r(   r  )rk  contentsr   r   r    _tuple_create  s   "r  c                 C  s  t | trkttjj| d }tj	}d|   krdk r#n ntj	}n7d|   kr-dk r3n ntj
}n'd|   kr=dk rCn ntj}nd|   krMdk rSn ntj}ntd|  ttj| g|d|}t||S t| d	rttjj| d }ttj|  gtjd|}t||S t | trt| tt| S t | trd
d | jD }| jd dksJ td|d< tt }|jt| jdd | jD |dd | jD | j dS | S )Ni   l        l        l         l            l            zUnsupported integer value r@   data_ptrc                 S     g | ]}t |qS r   _implicit_cvtr  sr   r   r    r    r  z!_implicit_cvt.<locals>.<listcomp>rZ   r   c                 S  r
  r   r  r  r   r   r    r    r  c                 S  s   g | ]}t |qS r   )rw   r  )r  r   r   r   r    r    s    )r9   r:   r;   r=   r  )!rv   rs   rw   	str_to_tytritonruntimejitmangle_typerA   r   r   r   rI   r   r   r   r   r(   r	  r  r  mapr  r   r;   r  r	   r   make_tensor_descriptorr9   r:   r=   rX   )rk  tyr   r   r;   r|  r   r   r    r    sB   




r  c                 C  s   t | tjjjr| jS | S r"   )rv   r  r  r  TensorWrapperr9   )tr   r   r    _unwrap_tensor  s   r  c                 C  s&   t |tjjjrtjj| |jS | S r"   )rv   r  r  r  r  r   )r  original_tensorr   r   r    _rewrap_tensor  s   r  c                   @  s0   e Zd Zg fddZdd Zdd Zdd Zd	S )
GridExecutorc                   sT   ddl m || _|| _|| _|| _fdd|j D   fdd|D | _d S )Nr   _normalize_tyc                   s   i | ]	\}}| |qS r   r   )r  r
  r  r  r   r    rv    s    z)GridExecutor.__init__.<locals>.<dictcomp>c                   s   g | ]}  |d kr|qS )r  )get)r  r
  )r4   r   r    r    s    z)GridExecutor.__init__.<locals>.<listcomp>)	r  r  r  	arg_namesgridpre_run_hooksr4   rx  
constexprs)r   r  r   r!  r"  r   )r4   r  r    r?     s   zGridExecutor.__init__c                   sN   i  fdd  fdd|D }i }|  D ]
\}} |||< q||fS )Nc                   s   t | trt| t | S t | tr!t | j| j| j| j| j	S t
| ds(| S t| }|  vr@| }| | < |   }|jddd}||| | |  t|| d}|S )Nr	  r   cpu)device)r  )rv   r  r  r  r   r9   r:   r;   r=   rX   r(   r  untyped_storager	  r$  	new_emptyset_storage_offsetr  r]   r  )rk  unwrapped_argstoragecpu_arg_to_cpustoragesr   r    r.    s*   


z,GridExecutor._init_args_hst.<locals>._to_cpuc                   s   g | ]} |qS r   r   )r  rk  )r.  r   r    r    r  z/GridExecutor._init_args_hst.<locals>.<listcomp>rw  )r   args_devrz  args_hst
kwargs_hstr-   r.   r   r-  r    _init_args_hst  s   zGridExecutor._init_args_hstc           
        st   i  fdd t ||D ]	\}} || q| D ]\}}|| }	 ||	 q D ]	\}}|| q.d S )Nc                   s   t | drt| t|} }|  | f|   < d S t| tr4t| |D ]	\} } | | q(d S t| trB | j|j d S d S )Nr	  )	r(   r  r&  r	  rv   r  zipr   r9   )arg_devarg_hst	_from_cpur/  r   r    r8    s   
 

z1GridExecutor._restore_args_dev.<locals>._from_cpu)r4  rx  r  copy_)
r   r0  r1  rz  r2  r5  r6  r-   	kwarg_dev	kwarg_hstr   r7  r    _restore_args_dev  s   zGridExecutor._restore_args_devc              
     s  t j  fdd| D }||\}}jD ]	}||i | qtj}zt jjg|R i |}fdd| D }tj	rN	|nj	}t
|dks[J d|ddt
|   }tj|  z,t|d D ]#}	t|d D ]}
t|d	 D ]}t|	|
| jd
i | qqyqqW n ty } ztjjjr tt||d }~ww W |  n|  w |||| d S )Nc                   s    i | ]\}}| j v r||qS r   )ry  rs  )argspecr   r    rv  6  s     z)GridExecutor.__call__.<locals>.<dictcomp>c                   s(   i | ]\}}|| j v r|nt|qS r   )r#  r  )r  r
  rk  r   r   r    rv  B  s   (    z#grid must have at most 3 dimensionsr  r   r   r   r   )r  getfullargspecr  rx  r3  r"  r  getcallargscallabler!  rE   r  r   rD   r   	Exceptionr  knobscompilationfront_end_debuggingr   r  rq  r<  )r   r0  rz  r1  r2  hookpatch_scopery  r!  r   r   r   er   )r=  r   r    __call__1  s>   




zGridExecutor.__call__N)r0   r1   r2   r?   r3  r<  rI  r   r   r   r    r    s
    
$r  c                   @  s   e Zd Zdd ZdS )ASTTransformerc                 C  sv   g }|j D ]
}|| |g7 }qt|dkrtdtjtjtjdt ddt d|j	tj
ddgg d	|_	|S )
Nr   z&Multiple assignments are not supportedinterpreter_semantic)idctxr  )r.   r   rM  F)r.   )funcry  keywords)targetsvisitrE   r   astCall	AttributeNameLoadr.   Constant)r   nodenamestargetr   r   r    visit_AssignZ  s   
zASTTransformer.visit_AssignN)r0   r1   r2   r[  r   r   r   r    rJ  X  s    rJ  c                   @  sJ   e Zd Ze Zdd Zdd Zdd Zdd Zd	d
 Z	dd Z
dd ZdS )FunctionRewriterc                 K  s   || _ || _d| _d| _d S )Nr  r   )r  rz  filenamedef_file_lineno)r   r  rz  r   r   r    r?   k  s   
zFunctionRewriter.__init__c                 C  sh   z
t | j\}}W n ty   | j Y S w |  \| _| _| || _| 	|}| 
|}| |S r"   )r  getsourcelinesr  rB  _get_jit_fn_file_liner]  r^  	_find_def
def_lineno_prepare_source_transform_ast_compile_and_exec)r   linesr  r,  transformed_astr   r   r    rewrite_astr  s   
	


zFunctionRewriter.rewrite_astc                 C  s   ddl m}m} ||| jS )Nr   )get_jit_fn_file_lineJITFunction)r  ri  rj  r  )r   ri  rj  r   r   r    r`    s   z&FunctionRewriter._get_jit_fn_file_linec                 C  s0   d}t |D ]\}}| dr|d }q|S )Nr   zdef r   )r  strip
startswith)r   rf  rb  r   liner   r   r    ra    s   zFunctionRewriter._find_defc                 C  s&   || j d d  }d|}t|S )Nr   r  )rb  jointextwrapdedent)r   rf  r,  r   r   r    rc    s   

z FunctionRewriter._prepare_sourcec                 C  s:   t |}| j|}t | | jd }t || |S r  )rR  parseast_transformerrQ  fix_missing_locationsr^  increment_lineno)r   r,  
parsed_astrg  
inc_linenor   r   r    rd    s   


zFunctionRewriter._transform_astc                 C  s^   t || jdd}i | j}| jj}t  D ]\}}||vr"|||< qt||| || jj S )Nexec)r]  mode)	compiler]  rz  r  r  globalsrx  rw  r0   )r   rg  compiled_codelocal_namespace
fn_globalsr-   r.   r   r   r    re    s   
z"FunctionRewriter._compile_and_execN)r0   r1   r2   rJ  rr  r?   rh  r`  ra  rc  rd  re  r   r   r   r    r\  h  s    r\  c                   @  sP   e Zd ZU i Zded< dddZdd Zd	d
 Zdd Ze	dd Z dd Z
dS )InterpretedFunctionzDict[Callable, Callable]rewritten_fnr   r   c                 K  sH   || _ t|fi || _|| _g | _t|}dd |j D | _	d S )Nc                 S  s   g | ]}|j qS r   r  r  r   r   r    r    ro  z0InterpretedFunction.__init__.<locals>.<listcomp>)
r  r\  rewriterrz  r"  r  	signature
parametersr  r   )r   r  rz  r  r   r   r    r?     s   
zInterpretedFunction.__init__c                O  s,   |rd S |   }t|| j|| j|i |S r"   )rewriter  r   r"  )r   r!  warmupry  rz  r  r   r   r    run  s   zInterpretedFunction.runc                 C  s   t |sJ | j| d S r"   )rA  r"  rl  )r   rF  r   r   r    add_pre_run_hook  s   z$InterpretedFunction.add_pre_run_hookc                 C  s*   | j | jvr| j | j| j < | j| j  S r"   )r  r  r  rh  r   r   r   r    r    s   zInterpretedFunction.rewritec                 C  s   | j jS r"   )r  r0   r   r   r   r    r0     s   zInterpretedFunction.__name__c              
   O  sJ   t | j |  }z||i |W S  ty$ } ztt||d }~ww r"   )r  r  r  rB  r   r  )r   ry  rz  r  rH  r   r   r    rI    s   
zInterpretedFunction.__call__Nr  )r0   r1   r2   r  r4   r?   r  r  r  r  rI  r   r   r   r    r~    s   
 
	
r~  )r  rg  )N
__future__r   rR  ro  r  typingr   r   r   r   r   r   numpyrA   r  triton.languagelanguagerw   r5   r   triton.language.semanticr	   triton.runtime.jitr
   triton.tools.tensor_descriptorr   errorsr   	functoolsr   _C.libtritonr   r#  r   r   r   r   r8   rT   ra   r   r   r   r   r   r   	vectorizer   r  r   r  rI   rb  r   r   rj  rk  rg  r~  r  r  r  r  r  r  r  r  r  r  r  rK  r  r  r  NodeTransformerrJ  r\  r~  r   r   r   r    <module>   sz     + @    %

	
"`
>
N
"pE