o
    ei
                     @   s^   U d dl Z d dlm  mZ d dl mZ d dlmZ g Ze	e
 ed< e jjG dd dZdS )    N)Tensor)2_scripted_functional_optimizer_deprecation_warning__all__c                   @   s   e Zd Z									ddee dedededed	ed
edededefddZdededB fddZdeedB  fddZ	dS )_FunctionalSGD{Gz?        Fparamslrmomentum	dampeningweight_decaynesterovmaximizeforeachfused_allow_empty_param_listc                 C   s|   t dd ||||d| _|| _|| _|| _|	| _tjt	tj
t	ttj
f f i | _t|dkr7|
s7tdd|i| _d S )N   )
stacklevel)r	   r
   r   r   r   z%optimizer got an empty parameter listr   )r   defaultsr   r   r   r   torchjitannotatedictr   strstatelen
ValueErrorparam_group)selfr   r	   r
   r   r   r   r   r   r   r    r   p/var/www/addictedbytheproject.nl/epg/venv/lib/python3.10/site-packages/torch/distributed/optim/functional_sgd.py__init__   s   
$z_FunctionalSGD.__init__paramgradNc                 C   s  | j d }| j d }| j d }| j d }|g}g }g }	d}
|durK|	| |jr+d}
|| jvr5i | j|< | j| }d|vrD|d n||d  t  tj||	|||||| j| j	|
| j
| jddd	 W d   n1 sqw   Y  | j| }|d
 }|dur||d< dS dS )z[Similar to self.step, but operates on a single parameter and
        its gradient.
        r   r
   r   r	   FNTmomentum_bufferr   r
   r	   r   r   r   has_sparse_gradr   r   
grad_scale	found_infr   )r   append	is_sparser   r   no_gradFsgdr   r   r   r   )r   r"   r#   r   r
   r   r	   r   momentum_buffer_listgradsr&   r   r$   r   r   r    
step_param:   sR   









z_FunctionalSGD.step_param	gradientsc                 C   s  | j d }g }g }g }| jd }| jd }| jd }| jd }	t|t|kr:tddt| d d	t|  d
}
t||D ]7\}}|d urx|| || |jrXd}
|| jvrbi | j|< | j| }d|vrq|d  qA||d  qAt	  t
j|||||||	| j| j|
| j| jd d d W d    n1 sw   Y  t|D ]\}}| j| }|| }|d ur||d< qd S )Nr   r	   r   r
   r   zEthe gradients passed in does not equal to the size of the parameters!zParams length: z. zGradients length: FTr$   r%   )r   r   r   r   zipr)   r*   r   r   r+   r,   r-   r   r   r   r   	enumerate)r   r1   r   params_with_gradr/   r.   r	   r   r
   r   r&   r"   gradientr   ipr$   r   r   r    stepl   sn   











z_FunctionalSGD.step)	r   r   r   r   FFFFF)
__name__
__module____qualname__listr   floatboolr!   r0   r8   r   r   r   r    r      sB    	

!2r   )r   torch.optim._functionaloptim_functionalr,   r   ,torch.distributed.optim._deprecation_warningr   r   r<   r   __annotations__r   scriptr   r   r   r   r    <module>   s   
