o
    eix                     @   s,   d dl Z d dlmZ dgZG dd dZdS )    N)dtype	GPULimitsc                   @   sz   e Zd ZdZdejfddZdedefddZ	dedefd	d
Z
	ddededefddZdefddZdefddZdS )r   a  Utility class that provides the theoretical limits of Nvidia GPU devices. The
    limits don't take into account thermal throttling (assume that the GPU run at its
    peak rated frequency). This is because user hardware configuration may influence
    power behavior.
    target_devicec                 C   s,   t j|| _t| jj | jj | _d S )N)torchcudaget_device_propertiesdevice_propertiesintmajorminorcompute_capability)selfr    r   c/var/www/addictedbytheproject.nl/epg/venv/lib/python3.10/site-packages/torch/cuda/_device_limits.py__init__   s   
zGPULimits.__init__	data_typereturnc              
   C   s   dddddddddd	}d}|t ju rd| j }n|t ju r&d| j }n|t ju r2d	| j }nd
}||vrDtd| j d| d|| S )N   @          )	fp16_80fp32_80fp64_80fp16_90fp32_90fp64_90fp16_100fp32_100fp64_100 fp16_fp32_fp64_unknownNo data for sm_ and .)r   float16r   float32float64RuntimeErrorr   r   hardcoded_device_valuesdict_keyr   r   r   #get_fma_per_cycle_per_sm_cuda_cores   s.   


z-GPULimits.get_fma_per_cycle_per_sm_cuda_coresc                 C   s   dddddddddddddd}d	}|t ju rd
| j }n2|t ju r*d
| j }n&|t ju r6d| j }n|t ju rBd| j }n|t ju rNd| j }nd}||vr`td| j d| d|| S )Ni   i   i   r   i   r   i    )int8_80r   r   r   int8_90fp8_90r   r   r   int8_100fp8_100r   r   r    r!   r"   int8_r#   r$   r%   r&   r'   )r   r(   r   bfloat16r)   int8r*   r+   r,   r   r   r   %get_fma_per_cycle_per_sm_tensor_coresA   s>   




z/GPULimits.get_fma_per_cycle_per_sm_tensor_coresTuse_tensor_coresc                 C   sF   | j j}| j j}d}|r| |}n| |}|| d | d }|S )Nr      g    eA)r   multi_processor_count
clock_rater8   r/   )r   r   r9   num_smsr<   fma_per_cycletflops_per_secondr   r   r   get_tflops_per_secondj   s   
zGPULimits.get_tflops_per_secondc                 C   s,   t d| jj d }| jjd }|| }|S )Nr:        )r	   r   memory_bus_widthmemory_clock_rate)r   bus_bytes_per_cyclemem_clock_rate_Hzbytes_per_secondr   r   r   get_memory_bandwidth_Bps{   s   z"GPULimits.get_memory_bandwidth_Bpsc                 C   s(   | j j}d}|| }|| j j d }|S )Nr   rB   )r   r;   r<   )r   r=   bytes_per_cycle_per_smbytes_per_cycle_per_devicerG   r   r   r   get_shared_memory_bandwidth_Bps   s   z)GPULimits.get_shared_memory_bandwidth_BpsN)T)__name__
__module____qualname____doc__r   devicer   r   r	   r/   r8   boolfloatr@   rH   rK   r   r   r   r   r      s    "*
)r   torch._Cr   __all__r   r   r   r   r   <module>   s    