o
    eiFZ                     @   s
  d dl Z d dlZd dlmZmZ d dlmZmZmZm	Z	 d dl
mZmZ d dlmZ g dZdejdeejgejdB f fdd	Zdejd
eejeeef f dedeejgejdB f fddZdejdeeej  deej deeef d
eejeeef f f
ddZdefddZG dd deZdejdededeeej  def
ddZG dd deZG dd  d eZdejdeded!edef
d"d#Zdejdeded$eeej  def
d%d&Z dejde	e defd'd(Z!dejdededefd)d*Z"ed+ddfdejdeded,ed-eeej  dB d.eeej  dB defd/d0Z#ej$ej%he#_&ej'he#_(e j)d1ed2eded3 fd4d5Z*dejd6edejfd7d8Z+dejd1edejfd9d:Z,	;dDdejd<ed1edeej d=eej- d>ed?ede.ejef fd@dAZ/G dBdC dCZ0dS )E    N)ABCabstractmethod)Callable	GeneratorIterableSequence)Anycast)always_wrap_policylambda_auto_wrap_policytransformer_auto_wrap_policysize_based_auto_wrap_policyenable_wrapwrapCustomPolicyModuleWrapPolicyroot_modulefnc                    s<   | hdt jdtdt jdB f fdd  | dd dS )aQ  
    This applies ``fn`` to every module in the module tree of ``root_module``
    following a post-order traversal. If ``fn`` returns an :class:`nn.Module`,
    then this replaces the original module with the newly returned one in the
    tree. Otherwise, ``fn`` should return ``None``, in which case the module is
    not changed.
    modulemodule_nameparent_moduleNc                    s   |   D ]\}}|vr|  |||  q| }|d urNt|tjs0td| d|  |s9td|  t|tjsFtd| t||| d S d S )Nz=Non-root modules should have their parent module set but got z for zTNon-root modules should have their module name set but got an empty module name for z.fn should return None or an nn.Module but got )named_childrenadd
isinstancennModuleAssertionErrorsetattr)r   r   r   child_module_namechild_moduleoptional_module_post_order_apply_innerr   visited_modules e/var/www/addictedbytheproject.nl/epg/venv/lib/python3.10/site-packages/torch/distributed/fsdp/wrap.pyr"   -   s6   
z2_post_order_apply.<locals>._post_order_apply_inner )r   r   str)r   r   r$   r!   r%   _post_order_apply   s   r(   target_module_to_kwargsfsdp_fnreturnc                    s&   dt jdt jdB f fdd}|S )z
    This constructs the "wrap" function to pass to :func:`_post_order_apply`
    based on ``target_module_to_kwargs``, which should be constructed from the
    wrapping policy.
    r   r+   Nc                    s,   | v r| ur|  } | fi |S d S Nr$   )r   kwargsr*   r   r)   r$   r%   r   V   s   z_construct_wrap_fn.<locals>.fn)r   r   )r   r)   r*   r   r$   r.   r%   _construct_wrap_fnK   s   "r/   module_classesignored_modulesroot_kwargsc                 C   sN   t t|}|  D ]}||v rq
t||r$||vr|||< d || d< q
|S )Nmixed_precision)tuplesetmodulesr   )r   r0   r1   r2   r)   module_classes_tupler   r$   r$   r%   $_run_mixed_precision_override_policya   s   
r8   c                  O      dS )z
    A simple recursive wrap policy that always returns ``True``. This means
    that every submodule is wrapped by the wrapper class in
    :func:`_recursive_wrap`.
    Tr$   )argsr-   r$   r$   r%   r
   u   s   r
   c                   @   sP   e Zd ZdZedejdeej dee	e
f deejee	e
f f fddZdS )	_Policyzk
    This defines an abstract base class that represents a policy for applying
    a module-level API.
    r   r1   r2   r+   c                 C   r9   )z
        This should return a dict ``target_module_to_kwargs`` that maps from
        each target module to wrap to its kwargs.
        Nr$   )selfr   r1   r2   r$   r$   r%   _run_policy   s   z_Policy._run_policyN)__name__
__module____qualname____doc__r   r   r   r5   dictr'   r   r=   r$   r$   r$   r%   r;   ~   s    
r;   r   recursenonwrapped_numelc                 C   s   |rdS t | t|S )a   
    This auto wrap policy wraps every module that is an instance of any type in
    ``module_classes`` as its own FSDP instance. The root module given by
    ``module`` is always wrapped as an FSDP instance regardless. Since the
    wrapping proceeds bottom up, each FSDP instance manages the parameters in
    its subtree excluding any already managed by a child FSDP instance.

    Args:
        module (nn.Module): Current module being considered.
        recurse (bool): If ``False``, then this function must decide whether
            ``module`` should be wrapped as an FSDP instance or not. If
            ``True``, then the function is still recursing down the module
            tree as a part of the DFS.
        nonwrapped_numel (int): Parameter numel not yet wrapped.
        module_classes (Set[Type[nn.Module]]): Set of module classes that are
            wrapped as FSDP instances.

    Returns:
        ``True`` if ``recurse=True``, and whether ``module`` should be wrapped
        if ``recurse=False``.
    Tr   r4   )r   rC   rD   r0   r$   r$   r%   _module_wrap_policy   s   rF   c                       s   e Zd ZdZdeeej  fddZdejde	ej de
eef de
eje
eef f fd	d
Zdd Zdef fddZ  ZS )r   z{
    This policy applies to every module of the specified module classes,
    passing in the kwargs given to the root.
    r0   c                 C   s   t |}|| _t|| _d S r,   )r5   _module_classesr'   _module_classes_str)r<   r0   module_classes_setr$   r$   r%   __init__   s   zModuleWrapPolicy.__init__r   r1   r2   r+   c                 C   sB   t | j}i }| D ]}||v rqt||rt|||< q|S r,   )r4   rG   r6   r   copy)r<   r   r1   r2   r0   r)   r   r$   r$   r%   r=      s   

zModuleWrapPolicy._run_policyc                 O   s   t ||d| jdS )N)rD   r0   )rF   rG   )r<   r   rC   r:   r-   r$   r$   r%   __call__   s   
zModuleWrapPolicy.__call__c                    s   t   d| j d S )N())super__repr__rH   r<   	__class__r$   r%   rQ      s   zModuleWrapPolicy.__repr__)r>   r?   r@   rA   r   typer   r   rJ   r5   rB   r'   r   r=   rM   rQ   __classcell__r$   r$   rS   r%   r      s    

r   c                   @   sr   e Zd ZdZdeejgeee	e
f B f fddZdejdeej dee	e
f deejee	e
f f fd	d
ZdS )r   a  
    This policy takes in a lambda function that maps a given ``nn.Module`` to
    either ``False``, ``True``, or a kwarg dictionary.
    - If the function returns ``False`` or an empty dictionary, then the module
      does not have the API applied.
    - If the function returns ``True``, then the module has the API applied
      with the root's kwargs.
    - If the function returns a non-empty dictionary, then the module has the
      API applied, and the dictionary overrides the root's kwargs.

    Example::

        >>> # xdoctest: +SKIP("undefined variables")
        >>> model = init_transformer_model(...)
        >>> def lambda_fn(module: nn.Module):
        >>>     if module is model.lm_head:
        >>>         return {"sharding_strategy": ShardingStrategy.SHARD_GRAD_OP}
        >>>     elif isinstance(module, TransformerBlock):
        >>>         return True
        >>>     return False
        >>> policy = CustomPolicy(lambda_fn)
        >>> fsdp_model = FSDP(model, auto_wrap_policy=policy)
    	lambda_fnc                 C   
   || _ d S r,   )
_lambda_fn)r<   rW   r$   r$   r%   rJ         
zCustomPolicy.__init__r   r1   r2   r+   c                 C   sr   i }|  D ]0}||v rq| |}t|ttfs td| |s#qt|}t|tr2|| |||< q|S )Nz_The lambda_fn passed to CustomPolicy should return False/True or a kwarg dict, but it returned )r6   rY   r   rB   bool
ValueErrorrK   update)r<   r   r1   r2   r)   r   resr-   r$   r$   r%   r=      s$   




zCustomPolicy._run_policyN)r>   r?   r@   rA   r   r   r   r[   rB   r'   r   rJ   r5   r=   r$   r$   r$   r%   r      s    &
r   rW   c                 C   s   |rdS || S )aU  
    A convenient auto wrap policy to wrap submodules based on an arbitrary user
    function. If `lambda_fn(submodule) == True``, the submodule will be wrapped as
    a `wrapper_cls` unit.

    Return if a module should be wrapped during auto wrapping.

    The first three parameters are required by :func:`_recursive_wrap`.

    Args:
        module (nn.Module): Current module being considered.
        recurse (bool): If ``False``, then this function must decide whether
            ``module`` should be wrapped as an FSDP instance or not. If
            ``True``, then the function is still recursing down the module
            tree as a part of the DFS.
        nonwrapped_numel (int): Parameter numel not yet wrapped.

        lambda_fn (Callable[[nn.Module], bool]): If this returns ``True``, then
            this module will be wrapped.
    Tr$   )r   rC   rD   rW   r$   r$   r%   r     s   r   transformer_layer_clsc                 C   s   t | |||S )a-  
    See :func:`_module_wrap_policy`, where ``transformer_layer_cls`` is the
    same as ``module_classes``. Note that shared parameters must be wrapped in
    the same FSDP instance, so this auto wrap policy can help wrap shared
    embeddings into the same FSDP instance for transformer models.
    )rF   )r   rC   rD   r_   r$   r$   r%   r   *  s   r   c                 O   s   |rdS t | t|S )NTrE   )r   r0   rC   r:   r-   r$   r$   r%   _wrap_module_cls_individually9  s   r`   c                    s   t  fdd|D S )zv
    A policy that wraps ``module`` if any policy in the passed in iterable of
    ``policies`` returns ``True``.
    c                 3   s    | ]
}| d V  qdS )r   rC   rD   Nr$   ).0policyr   rD   rC   r$   r%   	<genexpr>O  s
    
z_or_policy.<locals>.<genexpr>)any)r   rC   rD   policiesr$   rd   r%   
_or_policyE  s   
rh   g    חAmin_num_paramsforce_leaf_modulesexclude_wrap_modulesc                 C   s\   |du rt jn|}|du rt jn|}|}||k}|r$|o#t| t| S |o-t| t| S )a  
    A size-based auto wrap policy.

    Args:
        module (nn.Module): Current module being considered.
        recurse (bool): If ``False``, then this function must decide whether
            ``module`` should be wrapped as an FSDP instance or not. If
            ``True``, then the function is still recursing down the module
            tree as a part of the DFS.
        nonwrapped_numel (int): Parameter numel not yet wrapped.

        min_num_params (int): Customizable policy input that controls the size
            threshold over which a module is ready to be wrapped. This is in
            units of numel.
        force_leaf_modules (Optional[set[type[nn.Module]]]): Set of module types to keep
            as leaves, i.e. their children will never be wrapped.
        exclude_wrap_modules (Optional[set[type[nn.Module]]]): Set of module types to be
            excluded in wrapping.

    Returns:
        Whether ``module`` should be wrapped.
    N)r   FORCE_LEAF_MODULESEXCLUDE_WRAP_MODULESr   r4   )r   rC   rD   ri   rj   rk   min_nonwrapped_numelis_larger$   r$   r%   r   U  s   !r   wrapper_clswrapper_kwargs)NNNc                 k   sH    d| i|}t di | dV  W d   dS 1 sw   Y  dS )a  
    Context manager to wrap modules using a wrapper.

    Useful for when you'd like to apply the same configuration arguments to all
    child modules that you wrap. A particularly important use case is wrapping
    large layers so that they get sharded (in-place) during initialization, to
    avoid running out of system memory. Large layers can indicate that they
    should be sharded via the ``wrap`` annotation and this context manager can
    provide the exact configuration for these nested instances.

    Usage::

        with enable_wrap(wrapper_cls, **params):
            # Wraps layer in FSDP by default if within context
            self.l1 = wrap(torch.nn.Linear(5, 5))

    Args:
        wrapper_cls:
            Class that `wrap` annotation will `wrap` modules with, such as
            `FullyShardedDataParallel`.
        **wrapper_kwargs:
            Configuration settings that will be passed to all ``wrap``
            instances inside the context
    rp   Nr$   )_ConfigAutoWrap)rp   rq   r-   r$   r$   r%   r     s   "r   wrap_overridesc                 K   s>   t jrt jdu rtdi t j|}t| t jfi |S | S )a  
    Annotate that a module should be wrapped. Annotated modules will only be
    wrapped if inside of an :func:`enable_wrap` context manager. This allows
    a module to be initialized both with and without a wrapper without code
    change.

    The class that this function wraps the passed in ``nn.Module`` with is the
    passed in ``wrapper_cls`` argument into ``enable_wrap``. Both
    ``enable_wrap`` and ``wrap`` can take in kwargs specifying how to construct
    the ``wrapper_cls`` instance. In the case of duplicate kwargs in
    ``enable_wrap`` and ``wrap``, the argument passed into ``wrap`` will be
    respected.

    Usage::

        with enable_wrap(wrapper_cls=FSDP, **fsdp_config):
            # Wraps layer in FSDP by default if within context
            self.l1 = wrap(torch.nn.Linear(5, 5))

    Args:
        module (nn.Module): module to wrap (if in :func:`enable_wrap` context)
        **wrap_overrides: configuration overrides that will take priority over
            the values provided by the :func:`enable_wrap` context
    Nz.Expected _ConfigAutoWrap.wrapper_cls to be set)rr   in_autowrap_contextrp   r   r-   _wrap)r   rs   r$   r$   r%   r     s   
r   c                 K   sH   |d u rt dt| dri || j}|| fi |S || fi |S )NzExpected wrapper_cls to be set_wrap_overrides)r   hasattrrv   )r   rp   r-   	overridesr$   r$   r%   ru     s   
ru   Fauto_wrap_policyignored_paramsonly_wrap_childrenr-   c              	      s@  |du rt d|du rt d|  D ]&\}}||v rqzt|tt|r0t d| d| W q ty:   Y qw t fdd|  D }	|du rPt d|| d	|	d
rd}
|  D ]"\}}||v rfq]t	d|||| d|\}}t
| || |
|7 }
q]|	|
 }|s|| d|d
rt| |fi ||	fS | |
fS | dfS )a  
    Wraps submodules of ``module`` for which ``auto_wrap_policy`` returns
    ``True`` with ``wrapper_cls``.

    Args:
        module (nn.Module): Module to recursively wrap.
        auto_wrap_policy (Callable): A callable representing a policy that
            determines which modules to recursively wrap with ``wrapper_cls``.
        ignored_modules (set[torch.nn.Module]): Modules to ignore when
            wrapping.
        ignored_params (set[torch.nn.Parameter]): Parameters to ignore when
            wrapping; these should be the parameters contained in the modules
            in ``ignored_modules``.
    Returns:
        (nn.Module, int):
            ``module`` after wrapping and the numel recursively wrapped.
    NzMust specify auto_wrap_policy.zMust specify wrapper_clszChild module z is already wrapped by c                 3   s     | ]}| vr|  V  qd S r,   )numel)rb   prz   r$   r%   re     s    z"_recursive_wrap.<locals>.<genexpr>z#Expected auto_wrap_policy to be setTra   r   )r   ry   rp   r1   rz   Fr$   )r   named_modulesr   r	   rU   	TypeErrorsum
parametersr   _recursive_wrapr   ru   )r   ry   rp   r1   rz   r{   r-   _childrD   total_wrapped_numelnamewrapped_childnum_wrapped_params	remainderr$   r~   r%   r     sX   

r   c                   @   s   e Zd ZU dZdZeed< dZedB ed< i Z	e
eef ed< de
eef fddZeded	dfd
dZedddZdddZdededed	dfddZdS )rr   z
    Helper class to wrap modules based on default config args via a context manager.
    See :func:`enable_wrap` for more information.
    Frt   Nrp   r-   c                 K   rX   r,   r-   )r<   r-   r$   r$   r%   rJ   B  rZ   z_ConfigAutoWrap.__init__r+   c                 C   sD   t jrtddt _d| vrtdtt| d t _| d= | t _d S )Nz]You are already within an autowrap context and we currently do not supported nested autowrap.Trp   z9Expected to pass in wrapper_cls arg into _ConfigAutoWrap.)rr   rt   NotImplementedErrorr   r	   r   rp   r-   r   r$   r$   r%   enable_autowrap_contextE  s   
z'_ConfigAutoWrap.enable_autowrap_contextc                   C   s   dt _d t _i t _d S )NF)rr   rt   rp   r-   r$   r$   r$   r%   disable_autowrap_contextV  s   
z(_ConfigAutoWrap.disable_autowrap_contextc                 C   s   |  | j d S r,   )r   r-   rR   r$   r$   r%   	__enter__\  s   z_ConfigAutoWrap.__enter__exc_typeexc_valexc_tbc                 C   s   |    d S r,   )r   )r<   r   r   r   r$   r$   r%   __exit___  s   z_ConfigAutoWrap.__exit__)r+   N)r>   r?   r@   rA   rt   r[   __annotations__rp   r   r-   rB   r'   r   rJ   staticmethodr   r   r   r   r$   r$   r$   r%   rr   8  s   
 
rr   )F)1
contextlibrK   abcr   r   collections.abcr   r   r   r   typingr   r	   torch.nnr   __all__r   r(   rB   r'   r/   rU   r5   r8   r[   r
   r;   intrF   r   r   r   r   r`   rh   r   
ModuleList
ModuleDictrm   MultiheadAttentionrl   contextmanagerr   r   ru   	Parameterr4   r   rr   r$   r$   r$   r%   <module>   s  
,


	
 %7




7
#&
P