o
    kiM                  
   @   s  d dl Z d dlmZmZ d dlmZmZmZ d dlm	Z	 ddl
mZ e jjZe	e je je je jgZe	g ejejejejejejejejejejejejejej ej!ej"ej#ej$ej%ej&ej'ej(ej)ej*ej+ej,ej-ej.ej/ej0ej1Z2e	ej3ej4ej5ej6ej7ej8ej9ej:ej;g	Z<e2e<B Z=de>fdd	Z?d
e j@deAfddZBde>fddZCdS )    N)get_device_tflopsget_gpu_dram_gbps)has_hint	size_hintstatically_known_true)
OrderedSet   )flop_registryreturnc                 C   s~   | t v r=t|dkrtd| d|  | }t|d }d}|| }t |  }	|	|i |d|id }
|
| d }|S d	S )
aw  
    Estimates the compute time of an aten operator.

    Args:
        func_packet: The operator overload packet.
        args: The arguments to the operator.
        kwargs: The keyword arguments to the operator.
        out: The output of the operator.
        out_dtypes: The output data types.

    Returns:
        float: The estimated compute time in nanoseconds.
    r   z"Only support single out dtype got z for g  4&kCg      ?out_val   g    eAg        )r	   lenAssertionErrorpopr   )func_packetargskwargsout
out_dtypesdtypepeak_gpu_flopsfactorpeak_empirical_flopsflop_count_func
flop_countcompute_time r   i/var/www/addictedbytheproject.nl/epg/venv/lib/python3.10/site-packages/torch/utils/_runtime_estimation.pyget_compute_timeN   s   r   tc                 C   sX   d}t | j|  D ]\}}t|rt|s dS t|dks%|t|9 }q
||   S )z
    Calculates the memory consumption of a tensor.

    Args:
        t (torch.Tensor): The input tensor.

    Returns:
        int: The memory consumption of the tensor in bytes.
    r   r   )zipshapestrider   r   r   element_size)r   
real_numelsizer"   r   r   r   get_num_bytesp   s   
r&   c                 C   s>   t  }tdd | D }tdd |D }|| }|| }|S )aG  
    Estimates the memory transfer time of input and output tensors.

    Args:
        flat_args_kwargs (List[torch.Tensor]): The flat list of arguments and keyword arguments.
        flat_outs (List[torch.Tensor]): The flat list of outputs.

    Returns:
        float: The estimated memory transfer time in nanoseconds.
    c                 s   $    | ]}t |tjrt|V  qd S N
isinstancetorchTensorr&   .0r   r   r   r   	<genexpr>       
z$get_transfer_time.<locals>.<genexpr>c                 s   r'   r(   r)   r-   r   r   r   r/      r0   )r   sum)flat_args_kwargs	flat_outsgpu_memory_bandwidth
read_byteswrite_bytescounted_bytestransfer_timer   r   r   get_transfer_time   s   r9   )Dr+   torch._inductor.utilsr   r   %torch.fx.experimental.symbolic_shapesr   r   r   torch.utils._ordered_setr   flop_counterr	   opsatenfloat16bfloat16float32float64_FLOAT_TYPES
lift_freshr   	transposeviewdetach_unsafe_viewsplitadjoint
as_strideddiagonalexpand	expand_asmovedimpermuteselectsqueezemTmHrealimagview_as	unflattenunfoldunbind	unsqueezevsplithsplitsplit_with_sizesswapaxesswapdimschunk	_VIEW_OPSrandintrandnrand
randn_like	rand_likerandint_likearange	ones_like
zeros_like_CREATE_OPS_IGNORE_OPSfloatr   r,   intr&   r9   r   r   r   r   <module>   s    
	
$"