o
    Di:                     @   s  d dl mZmZmZmZ d dlZd dlmZ d dlm	Z
 d dlmZ d dlmZ d dlmZ d dlm	Z d dlmZ d d	lmZmZ d d
lmZ ddlmZ ddlmZ ddlmZ dZG dd dZ G dd dZ!dee"ej#f dej#fddZ$de"fddZ%G dd deZ&dS )    )DictOptionalUnionAnyN)ir)proton)amd)nvidia)passes)LazyDict)set_profile_allocatorNullAllocator)backends   )Hook   )flags)modec                   @   s.   e Zd Zdd Zdededee fddZdS )	CudaAllocatorc                 C   
   || _ d S N)instrumentation_hook)selfr    r   o/var/www/addictedbytheproject.nl/epg/venv/lib/python3.10/site-packages/triton/profiler/hooks/instrumentation.py__init__      
zCudaAllocator.__init__size	alignmentstreamc                 C   sn   || j jkrtd| d| j j || d | | }t|| j j}dd l}|j|f|jdd}|| j _|S )NzAlignment mismatch: z != r   r   cudadtypedevice)	r   profile_buffer_alignmentRuntimeErrormaxprofile_buffer_sizetorchemptyuint8buffer)r   r   r   r   aligned_sizer(   r+   r   r   r   __call__   s   zCudaAllocator.__call__N)__name__
__module____qualname__r   intr   r-   r   r   r   r   r      s    r   c                   @   sF   e Zd Zdeeef fddZdefddZdefddZd	d
 Z	dS )Instrumentationir_mapc                 C   r   r   )manager)r   r3   r   r   r   r   /   r   zInstrumentation.__init__r   c                 C   s&   || j v rtd| || j |< d S )NzIR already registered: )r4   r%   )r   r   funcr   r   r   register2   s   
zInstrumentation.registerc                 C   s*   |  | || jv r| j| | d S d S r   )load_dialectsr4   )r   r   pmcontextr   r   r   patch7   s   

zInstrumentation.patchc                 C   s   t | d S r   )triton_protonr7   )r   ctxr   r   r   r7   <   s   zInstrumentation.load_dialectsN)
r.   r/   r0   r   strr   r   r6   r:   r7   r   r   r   r   r2   -   s
    r2   mode_objreturnc           
         s  t | tjr| S | sd} | d}|d }i }|dd  D ]}d|v r0|dd\}}|||< qtd| d|dd	|d
d|ddt|dd|dd|dd|dd|ddd  fdd}|dtj d< |d
tj d
< |dtj	 d< |dtj
 d< |dtj d< t d dkrdd  d dD ng }|D ]}	|	tjvrtd|	 qdd |D  d< |dkrtjd!i  S |dkrtjd!i  S td |  )"Ndefault:r   r   =z#Malformed instrumentation option: ''metric_typecyclebuffer_typesharedbuffer_strategycircularbuffer_size0granularitywarpsampling_strategynonesampling_options optimizations)rD   rF   rH   rJ   rL   rN   rP   rR   c                    s8    |  }|r||vrt d|  d| |r|| S |S )NzUnknown z: )
ValueError)opt_namemappingvalueoptionsr   r   get_option_valueY   s   z)_interpret_mode.<locals>.get_option_valuec                 S   s   g | ]}|  qS r   )strip.0rV   r   r   r   
<listcomp>f   s    z#_interpret_mode.<locals>.<listcomp>,zUnknown optimization: c                 S   s   g | ]}t j| qS r   )r   rR   r[   r   r   r   r]   k   s    mmazUnknown mode: r   )
isinstancer   InstrumentationModesplitrS   getr1   metric_typesbuffer_typesbuffer_strategiesgranularitiessampling_strategieslenrR   DefaultMMA)
r>   parts	mode_nameoptsoptkeyvalrY   valuesrV   r   rW   r   _interpret_mode@   sN   


rs   c                  C   s6   t jjj j} | dkrdS | dkrdS td|  )Nr    r	   hipr   zUnsupported backend: )tritonruntimedriveractiveget_current_targetbackendr%   )rz   r   r   r   _get_backend_namev   s   r{   c                   @   s   e Zd ZU dZeed< dZeed< dZeed< dZ	e
e ed< dZeed	< d
Zeed< dedeejf fddZdd Zdd Zdedededeeef deddfddZdefddZdeddfddZdeddfd d!Zdeddfd"d#ZdS )$InstrumentationHookr   priorityactive_countFenable_host_bufferNhost_bufferr   r'      r$   r>   c                 C   s$   t || _t| | _d | _i | _d S r   )rs   r   r   	allocatorr+   metadata_path)r   r>   r   r   r   r      s   


zInstrumentationHook.__init__c                    s   t jdkr	tdt  jd7  _dt_tjjj	 tjjjj
d t   fdd fdd	tfd
dfdddt  j_tj tjtjj_d S )Nr   zFOnly one instance of the instrumentation hook can be active at a time.r   Tmax_shared_memc                    s   t jjj jv r
dnd}t| j jj jj jj j	j j
j jj jjj| tj|  t jjj jv r?t|  t|  t jjj jv rW dkrYt|  d S d S d S )NFTr   )r   OptimizeCLOCK32rR   r;   add_convert_proton_to_protongpurD   rN   rP   rL   rH   rF   rJ   r'   r$   triton_passescommonadd_cseSCHED_STORESadd_schedule_buffer_store!add_allocate_proton_shared_memorySCHED_BARRIERSadd_sched_barriers)r8   is_long_clk)backend_namer   r   r   r   to_llvmir_passes   s   

z6InstrumentationHook.activate.<locals>.to_llvmir_passesc                    s\   t |   dkrt |  d S  dkr,tjjjjd 	dd }t 
| | d S d S )Nr	   r   archrA   r   )r;   )add_allocate_proton_global_scratch_buffer%add_convert_proton_nvidia_gpu_to_llvmru   rv   rw   rx   utilsget_device_propertiesrb   "add_convert_proton_amd_gpu_to_llvm)r8   r   )r   r#   r   r   to_llvm_passes   s   
 z4InstrumentationHook.activate.<locals>.to_llvm_passesc                        | S r   r   r8   )r   r   r   <lambda>       z.InstrumentationHook.activate.<locals>.<lambda>c                    r   r   r   r   )r   r   r   r      r   )ttgpuir_to_llvmirllvmir_to_llvm)r|   r~   r%   r   instrumentation_onru   rv   rw   rx   get_current_devicer   r   r{   r2   r   compilerinstrumentationr   r   r=   r   knobscompilationinstrumentation_moder   r   )r   r#   r   r   r   r   r   activate   s   



zInstrumentationHook.activatec                 C   sX   t jdkrd S t  jd8  _t }i t| j_dt_dtj	j
_tt  d t _d | _d S )Nr   r   FrQ   )r|   r~   r{   r   r   r   r   r   ru   r   r   r   r   r   r   r+   )r   r   r   r   r   
deactivate   s   



zInstrumentationHook.deactivatemodulefunctionnamemetadata_grouphashr?   c                 C   s   |sd S t dd | D d }t dd | D d }|| j|< |ret }t| t }	|	dkr9t| n	|	dkrBt| t	| t
||}||_t	|}
t	|}t|||
|| d S td| )Nc                 s   "    | ]\}}| d r|V  qdS )ttgirNendswithr\   rp   pathr   r   r   	<genexpr>        z2InstrumentationHook.init_handle.<locals>.<genexpr>c                 s   r   )jsonNr   r   r   r   r   r      r   r	   r   z+IR path not found in metadata for function )nextitemsr   	triton_irr9   r7   r{   triton_nvidia
triton_amdr;   parse_mlir_moduleget_scope_id_namesget_scope_id_parents	libprotoninit_function_metadatar%   )r   r   r   r   r   r   ir_pathr   r9   r   scope_id_namesscope_id_parentsr   r   r   init_handle   s(   





zInstrumentationHook.init_handlec                 C   s   | j d u rdS | j  S )Nr   )r+   data_ptrr   r   r   r   	_data_ptr   s   zInstrumentationHook._data_ptrmetadatac                 C   sb   |j d}|j d}| jd u rdn	| j | j  }t|||  | tj	r/d t_
d S d S Nr   r   r   )datarc   r+   element_sizenumelr   enter_instrumented_opr   r|   r   r   r   r   r5   r   
alloc_sizer   r   r   enter   s   "
zInstrumentationHook.enterc                 C   sf   |j d}|j d}| jd u rdn	| j | j  }t|||  | tj	r1| 
| d S d S r   )r   rc   r+   r   r   r   exit_instrumented_opr   r|   r   _populate_host_bufferr   r   r   r   exit  s   "zInstrumentationHook.exitc              
   C   s  |r| j | rdd l}dd l}dd l}dtttf dtfdd}| jd u r'dn	| j	 | j
  }| jj d}i }t| j | d}	||	}W d    n1 sTw   Y  ||d }
|d }|d	 }| jjtjjkrq|nt|}t|| }| jjd
ko| jjtjjk}|rdd t|D }ndd |D }d|d  }d}|}|}t|||||
||||g
|}|jdt| g|R  }|j|| |jddt_tjd | }| |j!t"||jd tj|d  #| j}| | j$  d S d S d S )Nr   targetr?   c                 S   s$   | d dkrdS | d dkrdS dS )Nrz   r    r   rt   r   r   r   )r   r   r   r   encode_target  s
   z@InstrumentationHook._populate_host_buffer.<locals>.encode_targetr^   rprofile_scratch_size	num_warpsrQ   c                 S   s   g | ]}|qS r   r   r\   ir   r   r   r]   J  s    z=InstrumentationHook._populate_host_buffer.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   )r1   r   r   r   r   r]   L  s    (      Icpur!   )r"   )%r   r(   structr   r   r=   r   r1   r+   r   r   r   rP   rZ   rb   openloadrN   r;   SAMPLING_STRATEGYNONEri   rL   GRANULARITYWARPrangeVERSIONpackr)   r*   r|   r   copy_tensorlistview_asr   )r   r   r(   r   r   r   r   sampled_warpsr   filedevice_typescratch_mem_size
total_unituid_num	block_numis_all_warpsuid_vecheader_sizeheader_offsetpayload_offsetpayload_sizeheader_valuesheader_bytesconfig_portiondata_portionr   r   r   r     sL   "!z)InstrumentationHook._populate_host_buffer)r.   r/   r0   r}   r1   __annotations__r~   r   boolr   r   r   r'   r$   r   r=   r   ra   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r|      s   
 3*	r|   )'typingr   r   r   r   ru   triton._C.libtritonr   r   r   r;   r   r   r	   r   r
   r   triton._C.libprotonr   triton.compilerr   triton.runtime._allocationr   r   triton.backendsr   hookr   r   rQ   r   r   r   r2   r=   ra   rs   r{   r|   r   r   r   r   <module>   s(    6
