o
    kig                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlZd dl	m
Z
mZ d dlmZ ddlmZmZmZmZ e seedejjd< edejjd< ed	ejjd	< ed
ejjd
< edejjd< dNddZdOdeddfddZdOdeddfddZdOdedeeef fddZdOdedeeef fddZdOdedefddZdOdedefddZdOdedefddZdOdedefddZ dOdede!eef fd d!Z"dOdede#fd"d#Z$dOd$e#deddfd%d&Z%	dOd'e!eef dB de&eeef  fd(d)Z'dPded+e(fd,d-Z)	*dQd/ed+e(ddfd0d1Z*d2d2d2ej+d*dfd3ed4 dB d5ed6 dB d7ed8 d9ed:e(d;e&e dB ddfd<d=Z,G d>d? d?Z-G d@dA dAe-Z.dBe-ddfdCdDZ/de-fdEdFZ0G dGdH dHejj1Z2ej3dOdIe2ddJfdKdLZ4g dMZ5dS )R    N)AnyLiteral)%_augment_memory_snapshot_stack_traces_dummy_type)Device   )_get_device_index_is_compiled
_lazy_initis_initialized_xpu_XPUAllocator_XPUMemPool%_xpu_beginAllocateCurrentThreadToPool_xpu_endAllocateToPool_xpu_releasePoolreturnc                   C   s   t  r
tj  dS dS )aZ  Release all unoccupied cached memory currently held by the caching
    allocator so that those can be used in other XPU application.

    .. note::
        :func:`~torch.xpu.empty_cache` doesn't increase the amount of XPU
        memory available for PyTorch. However, it may help reduce fragmentation
        of XPU memory in certain cases.
    N)r   torch_C_xpu_emptyCache r   r   Z/var/www/addictedbytheproject.nl/epg/venv/lib/python3.10/site-packages/torch/xpu/memory.pyempty_cache   s   	r   devicec                 C      t | dd} tj| S )a  Reset the "peak" stats tracked by the XPU memory allocator.

    See :func:`~torch.xpu.memory_stats` for details. Peak stats correspond to the
    `"peak"` key in each individual stat dict.

    Args:
        device (torch.device or int or str, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.xpu.current_device`,
            if :attr:`device` is ``None`` (default).
    Toptional)r   r   r   _xpu_resetPeakMemoryStatsr   r   r   r   reset_peak_memory_stats'      r   c                 C   r   )a  Reset the "accumulated" (historical) stats tracked by the XPU memory allocator.

    See :func:`~torch.xpu.memory_stats` for details. Accumulated stats correspond to
    the `"allocated"` and `"freed"` keys in each individual stat dict.

    Args:
        device (torch.device or int or str, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.xpu.current_device`,
            if :attr:`device` is ``None`` (default).
    Tr   )r   r   r    _xpu_resetAccumulatedMemoryStatsr   r   r   r   reset_accumulated_memory_stats6   r   r!   c                 C   s"   t  si S t| dd} tj| S )zLReturn the result of :func:`~torch.xpu.memory_stats` as a nested dictionary.Tr   )r   r   r   r   _xpu_memoryStatsr   r   r   r   memory_stats_as_nested_dictE   s   r#   c                    sF   g dt dtddf fdd t| d} d|   tS )	a@  Return a dictionary of XPU memory allocator statistics for a given device.

    The return value of this function is a dictionary of statistics, each of
    which is a non-negative integer.

    Core statistics:

    - ``"allocated_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      amount of allocated memory.
    - ``"reserved_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      amount of reserved memory.
    - ``"active_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      amount of active memory.
    - ``"requested_bytes.{all,large_pool,small_pool}.{current,peak,allocated,freed}"``:
      memory requested by client code, compare this with allocated_bytes to check if
      allocation rounding adds too much overhead.

    For these core statistics, values are broken down as follows.

    Pool type:

    - ``all``: combined statistics across all memory pools.
    - ``large_pool``: statistics for the large allocation pool (for size >= 1MB allocations).
    - ``small_pool``: statistics for the small allocation pool (for size < 1MB allocations).

    Metric type:

    - ``current``: current value of this metric.
    - ``peak``: maximum value of this metric.
    - ``allocated``: historical total increase in this metric.
    - ``freed``: historical total decrease in this metric.

    Args:
        device (torch.device or int or str, optional): selected device. Returns
            statistics for the current device, given by :func:`~torch.xpu.current_device`,
            if :attr:`device` is ``None`` (default).
    prefixobjr   Nc                    sT   t |tr!t| dkr| d7 } | D ]\}} | | | qd S | |f d S )Nr   .)
isinstancedictlenitemsappend)r$   r%   kv_recurse_add_to_resultresultr   r   r/   u   s   
z,memory_stats.<locals>._recurse_add_to_resultr    )strr   r#   sortcollectionsOrderedDict)r   statsr   r.   r   memory_statsM   s   &
	

r7   c                 C      t | dddS )a  Return the current GPU memory occupied by tensors in bytes for a given device.

    Args:
        device (torch.device or int or str, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.xpu.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        This is likely less than the amount shown in `xpu-smi` since some
        unused memory can be held by the caching allocator and some context
        needs to be created on GPU.
    r   zallocated_bytes.all.currentr   r7   getr   r   r   r   memory_allocated   s   r;   c                 C   r8   )a  Return the maximum GPU memory occupied by tensors in bytes for a given device.

    By default, this returns the peak allocated memory since the beginning of
    this program. :func:`~torch.xpu.reset_peak_memory_stats` can be used to
    reset the starting point in tracking this metric. For example, these two
    functions can measure the peak allocated memory usage of each iteration in a
    training loop.

    Args:
        device (torch.device or int or str, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.xpu.current_device`,
            if :attr:`device` is ``None`` (default).
    r   zallocated_bytes.all.peakr   r9   r   r   r   r   max_memory_allocated      r<   c                 C   r8   )aJ  Return the current GPU memory managed by the caching allocator in bytes for a given device.

    Args:
        device (torch.device or int or str, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.xpu.current_device`,
            if :attr:`device` is ``None`` (default).
    r   zreserved_bytes.all.currentr   r9   r   r   r   r   memory_reserved   s   r>   c                 C   r8   )a  Return the maximum GPU memory managed by the caching allocator in bytes for a given device.

    By default, this returns the peak cached memory since the beginning of this
    program. :func:`~torch.xpu.reset_peak_memory_stats` can be used to reset
    the starting point in tracking this metric. For example, these two functions
    can measure the peak cached memory amount of each iteration in a training
    loop.

    Args:
        device (torch.device or int or str, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.xpu.current_device`,
            if :attr:`device` is ``None`` (default).
    r   zreserved_bytes.all.peakr   r9   r   r   r   r   max_memory_reserved   r=   r?   c                 C      t   t| dd} tj| S )aP  Return the global free and total GPU memory for a given device.

    Args:
        device (torch.device or int or str, optional): selected device. Returns
            statistic for the current device, given by :func:`~torch.xpu.current_device`,
            if :attr:`device` is ``None`` (default).

    Returns:
        tuple[int, int]: a tuple of two integers (free_memory, total_memory) in bytes.
            The first value is the free memory on the device (available across all processes and applications),
            The second value is the device's total hardware memory capacity.
    Tr   )r
   r   r   r   _xpu_getMemoryInfor   r   r   r   mem_get_info   s   rB   c                 C   r@   )ab  
    Retrieve the memory fraction currently set for a process on a given XPU device.
    This fraction represents the portion of the total device memory that
    the caching allocator is allowed to use. The allowed memory is calculated as:

    .. math:: \text{allowed\_memory} = \text{total\_memory} \times \text{fraction}

    Args:
        device (torch.device or int or str, optional): selected device. It uses the current device,
            given by :func:`~torch.xpu.current_device`, if :attr:`device` is ``None`` (default).

    Returns:
        float: The memory fraction in the range 0.0 to 1.0.
    Tr   )r
   r   r   r   _xpu_getMemoryFractionr   r   r   r   get_per_process_memory_fraction   s   rD   fractionc                 C   s6   t   t|dd}t| tstdtj| | dS )a=  
    Set the memory fraction for a single process on XPU device.
    This function limits the amount of memory that the caching allocator can allocate
    on the specified XPU device. The allowed memory is computed as:

    .. math:: \text{allowed\_memory} = \text{total\_memory} \times \text{fraction}

    If the process attempts to allocate more than this allowed memory,
    an out-of-memory error will be raised by the allocator.

    Arguments:
        fraction (float): Range: 0~1. Allowed memory equals total_memory * fraction.
        device (torch.device or int or str, optional): selected device. It uses the current device,
            given by :func:`~torch.xpu.current_device`, if :attr:`device` is ``None`` (default).

    .. note:: In general, the total available free memory is less than the total capacity.
    Tr   z3Invalid type for fraction argument, must be `float`N)r
   r   r'   float	TypeErrorr   r   _xpu_setMemoryFraction)rE   r   r   r   r   set_per_process_memory_fraction   s
   
rI   
mempool_idc                 C   s   t  sg S tj| d S )a  
    Return a snapshot of the XPU memory allocator state across all devices.
    Provides detailed information for each memory segment managed by the allocator
    including its size, owning pool, associated stream, call stack traces, and other relevant attributes.

    Arguments:
        mempool_id (tuple[int, int] or None, optional): The memory pool id. If None, the default memory pool is used.

    Returns:
        list[dict[str, Any]]: List of memory segments and their attributes.
    segments)r   r   r   _xpu_memorySnapshot)rJ   r   r   r   memory_snapshot  s   rM   Faugment_with_fx_tracesc                 C   s   t jd}|rt|}|S )a  
    Capture a snapshot of the XPU memory state at the time this function is called.

    The returned snapshot is a dictionary with the following structure.

    .. code-block:: python

        class Snapshot(TypedDict):
            segments: List[Segment]
            device_traces: List[List[TraceEntry]]


        class Segment(TypedDict):
            # A Segment represents a contiguous memory region returned by the SYCL runtime.
            #
            # All reserved memory is composed of these segments. Segments are
            # cached and reused by the allocator. When allocations are smaller
            # than the segment, the segment may be split into multiple Blocks.
            #
            # Calling :func:`~torch.xpu.memory.empty_cache` releases segments that are entirely inactive.
            address: int
            total_size: int  #  total size of segment
            stream: int
            segment_type: Literal["small", "large"]  # 'large' (>1MB)
            allocated_size: int  # size of memory in use
            active_size: int  # size of memory in use or in active_awaiting_free state
            blocks: List[Block]


        class Block(TypedDict):
            # A sub-region of a Segment, either currently allocated or cached for reuse.
            size: int
            requested_size: int  # Original requested size (may be smaller than `size`)
            address: int
            state: Literal[
                "active_allocated",  # used by a tensor
                "active_awaiting_free",  # waiting for another stream synchronization, then become free
                "inactive",  # free for reuse
            ]
            frames: List[Frame]  # stack trace from where the allocation occurred


        class Frame(TypedDict):
            filename: str
            line: int
            name: str
            # Optional fields when `augment_with_fx_traces=True` and the frame
            # corresponds to FX-generated code.
            fx_node_op: str  # FX node operation type (e.g., 'call_function', 'output')
            fx_node_name: str  # FX node name (e.g., 'linear', 'relu_1')
            fx_original_trace: str  # Original model source code stack trace


        class TraceEntry(TypedDict):
            # Trace entries are recorded only when :func:`~torch.xpu.memory._record_memory_history` is enabled.
            action: Literal[
                "alloc"  # memory allocated
                "free_requested",  # received a call to free memory
                "free_completed",  # memory reclaimed and reusable
                "segment_alloc",  # ask SYCL runtime for more memory
                "segment_free",  # called SYCL runtime to return memory to XPU
                "segment_map",  # ask SYCL runtime to map memory
                "segment_unmap",  # called SYCL runtime to unmap memory
                "snapshot",  # snapshot taken
                "oom",  # threw an OOM exception
            ]
            addr: int  # not present for OOM
            frames: List[Frame]
            size: int
            stream: int
            device_free: int  # only present for OOM, the amount of free memory reported by the device

    Arguments:
        device (torch.device or int or str, optional): selected device. It uses the current device,
            given by :func:`~torch.xpu.current_device`, if :attr:`device` is ``None`` (default).
        augment_with_fx_traces (bool, optional): If True, augment stack trace frames with FX debug information
            that maps generated FX code back to original model source code. This adds the FX-related
            fields (fx_node_op, fx_node_name, fx_original_trace) to Frame objects. Default is ``False``.

    Returns:
        The Snapshot dictionary object
    N)r   r   rL   r   )r   rN   sr   r   r   	_snapshot  s   TrP   dump_snapshot.picklefilenamec                 C   sF   t |d}t| d}t|| W d   dS 1 sw   Y  dS )a  
    Save a pickled version of the `torch.memory._snapshot()` dictionary to a file.

    This file can be opened by the interactive snapshot viewer at pytorch.org/memory_viz

    Snapshot file sizes scale with `max_entries` and stack trace depth per entry,
    with several KB per entry. These can easily be in the GB range for longer running
    workflows with large `max_entries`.

    Arguments:
        filename (str, optional): Name of the file to create. Defaults to "dump_snapshot.pickle".
        augment_with_fx_traces (bool, optional): If True, augment the snapshot with FX debug information
            before dumping. This maps generated FX code stack traces back to original model
            source code. Defaults to ``False``.
    )rN   wbN)rP   openpickledump)rR   rN   rO   fr   r   r   _dump_snapshotp  s   
"rX   allenabled)staterY   context)r[   allocrY   stacks)pythonrY   max_entriesclear_historyskip_actionsc              	   C   s,   t j| |||||dur| dS g  dS )a  
    Enable recording of stack traces associated with memory allocations, so you can
    tell what allocated any piece of memory in :func:`~torch.xpu.memory._snapshot()`.

    In addition to keeping stack traces with each current allocation and free,
    this will also enable recording of a history of all alloc/free events.

    Use :func:`~torch.xpu.memory._snapshot()` to retrieve this information,
    and the tools in `_memory_viz.py` to visualize snapshots.

    Buffer behavior
    ---------------

    This will store up to `max_entries` instances of `TraceEntry` when enabled.
    Python trace collection defaults to `sys.maxsize`, meaning long-running
    or indefinitely running jobs should set a reasonable limit to avoid excessive
    memory use. Expect each entry to be several KB.

    Longer running workflows or those with smaller `max_entries` values will only
    store the last accumulated `max_entries` entries, meaning new entries overwrite
    older entries, reference to ring buffer behavior.

    Latency impact
    --------------

    The Python trace collection is fast (2us per trace), so you may consider
    enabling this on production jobs if you anticipate ever having to debug
    memory issues.

    C++ trace collection is also fast (~50ns/frame), which for many typical programs
    works out to ~2us per trace, but can vary depending on stack depth.

    Arguments:
        enabled (Literal["state", "all"], optional):
            `None`, disable recording memory history.
            `"state"`, keep information for currently allocated memory.
            `"all"`, additionally keep a history of all alloc/free calls.
            Defaults to "all".
        context (Literal["state", "alloc", "all"], optional):
            `None`, Do not record any tracebacks.
            `"state"`, Record tracebacks for currently allocated memory.
            `"alloc"`, additionally keep tracebacks for alloc calls.
            `"all"`, additionally keep tracebacks for free calls.
            Defaults to "all".
        stacks (Literal["python", "all"], optional):
            `"python"`, include Python, TorchScript, and inductor frames in tracebacks.
            `"all"`, additionally include C++ frames.
            Defaults to "all".
        max_entries (int, optional): Keep a maximum of `max_entries`
            alloc/free events in the recorded history recorded.
        clear_history (bool, optional): Clear history when enabling, defaults to ``False``.
        skip_actions (list[str], optional): List of action types to skip when recording
            memory history. This can be used to reduce memory overhead by excluding
            certain types of events from being recorded. Valid action types are:

            - `"alloc"`: Memory allocation events
            - `"free_requested"`: Free requests (memory marked for freeing)
            - `"free_completed"`: Completed free operations (memory actually freed)
            - `"segment_alloc"`: Segment allocation from SYCL runtime
            - `"segment_free"`: Segment freed back to XPU via SYCL runtime
            - `"segment_map"`: Segment map events
            - `"segment_unmap"`: Segment unmap events
            - `"snapshot"`: Memory snapshot generation events
            - `"oom"`: Out-of-memory exceptions

            For example, to skip recording free_requested events:
            `skip_actions=["free_requested"]`

            Defaults to ``None`` (record all actions).
    N)r   r   _xpu_recordMemoryHistory)rZ   r\   r^   r`   ra   rb   r   r   r   _record_memory_history  s   O
rd   c                   @   s*   e Zd ZdZdejjfddZdd ZdS )_XPUAllocatorz,Wrapper over internal XPU memory allocators.	allocatorc                 C   s
   || _ d S N
_allocator)selfrf   r   r   r   __init__  s   
z_XPUAllocator.__init__c                 C   s   | j S rg   rh   rj   r   r   r   rf     s   z_XPUAllocator.allocatorN)	__name__
__module____qualname____doc__r   r   r   rk   rf   r   r   r   r   re     s    re   c                   @   s&   e Zd ZdZdededefddZdS )XPUPluggableAllocatora  
    XPU memory allocator loaded dynamically from a shared library.

    This lets users provide custom allocation and free functions implemented
    in a separate shared library. The allocator is registered and could become
    available for use via :func:`~torch.xpu.memory.change_current_allocator`.

    Arguments:
        path_to_lib_file (str):
            Filesystem path to the shared library file containing the allocation
            and free functions.
        alloc_fn_name (str):
            Name of the allocation function exported from the shared library.
            The function must have the signature:

                ``void* alloc_fn(size_t size, int device, sycl::queue* queue);``

        free_fn_name (str):
            Name of the free function exported from the shared library.
            The function must have the signature:

                ``void free_fn(void* ptr, size_t size, int device, sycl::queue* queue);``
    path_to_lib_filealloc_fn_namefree_fn_namec           	      C   sj   t |}t||}t||}t |t jj}t |t jj}|d u s'|d u r+tdtj	||| _
d S )Nz9Failed to load allocator symbols from the shared library.)ctypesCDLLgetattrcastc_void_pvalueRuntimeErrorr   r   _xpu_customAllocatorri   )	rj   rr   rs   rt   allocator_liballoc_fn_ptrfree_fn_ptralloc_fn_addrfree_fn_addrr   r   r   rk     s   


zXPUPluggableAllocator.__init__N)rm   rn   ro   rp   r2   rk   r   r   r   r   rq     s    rq   rf   c                 C   s   t j|   dS )a  Change the currently used memory allocator to be the one provided.

    .. note::
        If the current allocator has already been used/initialized, this function will error.

    Arguments:
        allocator (torch.xpu.memory._XPUAllocator): allocator to be set as the active one.
    N)r   r   _xpu_changeCurrentAllocatorrf   )rf   r   r   r   change_current_allocator  s   
r   c                   C   s   t tj S )zxReturn the allocator being currently used.

    Returns:
        _XPUAllocator: the allocator being currently used.
    )re   r   r   _xpu_getAllocatorr   r   r   r   _get_current_allocator$  s   r   c                       s   e Zd ZdZ		ddejjdB def fddZe	de
eef f fd	d
Ze	dejjdB f fddZdef fddZdd Z  ZS )MemPoola  MemPool represents a pool of memory in a caching allocator. Currently,
    it's just the ID of the pool object maintained in the XPUCachingAllocator.

    Args:
        allocator(torch._C._xpu_XPUAllocator, optional): a
            torch._C._xpu_XPUAllocator object that can be used to
            define how memory gets allocated in the pool. If :attr:`allocator`
            is ``None`` (default), memory allocation follows the default/
            current configuration of the XPUCachingAllocator.
        use_on_oom(bool): a bool that indicates if this pool can be used
            as a last resort if a memory allocation outside of the pool fails due
            to Out Of Memory. This is ``False`` by default.
    NFrf   
use_on_oomc                    s   t  |d| d S )NT)superrk   )rj   rf   r   	__class__r   r   rk   =  s   zMemPool.__init__r   c                       t  jS )z3Returns the ID of this pool as a tuple of two ints.)r   idrl   r   r   r   r   D     z
MemPool.idc                    r   )z9Returns the allocator this MemPool routes allocations to.)r   rf   rl   r   r   r   rf   I  r   zMemPool.allocatorc                    s
   t   S )z)Returns the reference count of this pool.)r   	use_countrl   r   r   r   r   N  s   
zMemPool.use_countc                 C   s   t j| j}|S )zReturn a snapshot of the XPU memory allocator pool state across all
        devices.

        Interpreting the output of this function requires familiarity with the
        memory allocator internals.
        )r   xpurM   r   )rj   snapshotr   r   r   r   R  s   zMemPool.snapshotNF)rm   rn   ro   rp   r   r   r   boolrk   propertytupleintr   rf   r   r   __classcell__r   r   r   r   r   .  s    
r   poolr   c              
   c   s|    |du r
t j nt|}t j|| j zdV  W t j|| j t j|| j dS t j|| j t j|| j w )a  A context manager that routes allocations to a given pool.

    Args:
        pool(torch.xpu.MemPool): a :class:`MemPool` object to be made active so that
            allocations route to this pool.
        device (torch.device or int, optional): selected device. Uses :class:`MemPool on
            the current device, given by :func:`~torch.xpu.current_device`,
            if :attr:`device` is ``None`` (default).

    .. note::
        This context manager makes only current thread's allocations route to
        the given pool. If a new thread is spawned inside the context manager
        (e.g. by calling backward) the allocations in that thread will not
        route to the given pool.
    N)	r   r   current_devicer   r   r   r   r   r   )r   r   device_indexr   r   r   use_mem_pool]  s   r   )r   rq   r   r   rD   r<   r?   rB   r;   r>   rM   r7   r#   r!   r   rI   r   )r   Nrg   r   )rQ   F)6r4   
contextlibru   rU   systypingr   r   r   torch._utilsr   r   torch.typesr   r1   r   r	   r
   r   r   __dict__r   r   r!   r(   r2   r#   r7   r   r;   r<   r>   r?   r   rB   rF   rD   rI   listrM   r   rP   rX   maxsizerd   re   rq   r   r   r   r   contextmanagerr   __all__r   r   r   r   <module>   s    
8
[




Y+
/