o
    kiR                     @   s   U d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	 G dd deZ
G dd	 d	eZG d
d deZG dd deZdaeejdB  dB ed< dejfddZdS )    N)chain_get_device_index)Function)commc                   @   $   e Zd Zedd Zedd ZdS )	Broadcastc           	         s   t dd |D stddd |D }|| _t|dkrdS t|| _|d  | _dd |D | _t	|| j}|D ]}t
| jD ]\}}|rRt|| ||< qCq<g }t
| jd	d  D ]\ }|sq| fd
d|D  q_| j|  tt|S )Nc                 s       | ]	}|j jd kV  qdS cpuNdevicetype.0i r   f/var/www/addictedbytheproject.nl/epg/venv/lib/python3.10/site-packages/torch/nn/parallel/_functions.py	<genexpr>       z$Broadcast.forward.<locals>.<genexpr>z2Broadcast function not implemented for CPU tensorsc                 S      g | ]}t |d qS Tr   r   xr   r   r   
<listcomp>       z%Broadcast.forward.<locals>.<listcomp>r   r   c                 S   s   g | ]}|  qS r   
is_complex)r   inpr   r   r   r      s       c                 3   s    | ]}|  V  qd S Nr   )r   outputidxr   r   r   "       )allAssertionErrortarget_gpuslen
num_inputs
get_deviceinput_devicecomplex_maskr   broadcast_coalesced	enumeratetorchview_as_complexneeds_input_gradextendmark_non_differentiabletupler   from_iterable)	ctxr'   inputsoutputsdevice_outputsr   r   non_differentiablesinput_requires_gradr   r"   r   forward   s.   

zBroadcast.forwardc                 G   s    t j| j| jg|R  }d| S )Nr    )ReduceAddCoalescedapplyr+   r)   )r6   grad_outputsgradsr   r   r   backward&   s   zBroadcast.backwardN__name__
__module____qualname__staticmethodr<   rA   r   r   r   r   r   
   s
    
r   c                   @   r   )r=   c                    s    fddt dt D | _ fddt D }|| _tdd  D fddt dtD }t||}tdd t||D }|S )	Nc                       g | ]} |   qS r   r*   r   r@   r   r   r   2   s    z.ReduceAddCoalesced.forward.<locals>.<listcomp>r   c                    rG   r   r   r   rI   r   r   r   6   s    c                 s   s&    | ]}|  rt|n|V  qd S r    )r   r/   view_as_realr   gr   r   r   r   9   s    
z-ReduceAddCoalesced.forward.<locals>.<genexpr>c                    s   g | ]
} ||  qS r   r   r   )grads_convertedr)   r   r   r   =   s    c                 s   s&    | ]\}}|rt |n|V  qd S r    r/   r0   )r   rr   r   r   r   r   C   s
    
)ranger(   r'   r,   r4   r   reduce_add_coalescedzip)r6   destinationr)   r@   r,   grads_resultsr   )r@   rM   r)   r   r<   0   s    
zReduceAddCoalesced.forwardc                 G   s   dt j| jg|R   S )NNN)r   r>   r'   )r6   r?   r   r   r   rA   J   s   zReduceAddCoalesced.backwardNrB   r   r   r   r   r=   /   
    
r=   c                   @   r   )Gatherc                    s   t dd |D std|dkrd _nt|d}| _| _tdd |D  _t dd |D rK|dkrKtd	d |D }tjd
dd d _	nd _	t fdd|D  _
t|dkoe|d  }t| j j}|rvt|}|S )Nc                 s   r	   r
   r   r   r   r   r   r   U   r   z!Gather.forward.<locals>.<genexpr>z/Gather function not implemented for CPU tensorsr   Tc                 s   s    | ]}|  V  qd S r    rH   r   r   r   r   r   ]   r$   c                 s   s    | ]	}|  d kV  qdS r   N)dimr   tr   r   r   r   ^   r   r   c                 s   s    | ]}| d V  qdS )r   N)viewr[   r   r   r   r   _       zvWas asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.   )
stacklevelFc                 3   s    | ]	}|  jV  qd S r    )sizerZ   r   r6   r   r   r   i   r   )r%   r&   target_devicer   rZ   r4   
input_gpuswarningswarnunsqueezed_scalarinput_sizesr(   r   r   gatherr/   r0   )r6   rc   rZ   r7   r   r!   r   rb   r   r<   S   s,   

zGather.forwardc                 C   s6   t | j| j| j|}| jrtdd |D }d| S )Nc                 s   s    | ]}|d  V  qdS rY   r   rK   r   r   r   r   z   r$   z"Gather.backward.<locals>.<genexpr>rV   )Scatterr>   rd   rh   rZ   rg   r4   )r6   grad_outputscattered_gradsr   r   r   rA   t   s   zGather.backwardNrB   r   r   r   r   rX   R   s
    
 rX   c                   @   r   )rj   c              	   C   s   dd |D }|| _ |jjdkr| nd| _d }tj r*| jdkr*dd |D }| }t	
|||| j |}|rCtdd |D }|d uryt|D ]-\}}	tj||  tj }
|
||  |	|
 W d    n1 ssw   Y  qK|S )Nc                 S   r   r   r   r   r   r   r   r      r   z#Scatter.forward.<locals>.<listcomp>r   c                 S   s   g | ]	}t t|qS r   )_get_streamr/   r   )r   r   r   r   r   r      s    c                 s   s    | ]}t |V  qd S r    rN   )r   or   r   r   r      r^   z"Scatter.forward.<locals>.<genexpr>)rZ   r   r   r*   r+   r/   acceleratoris_availabler   r   scatterr4   r.   device_indexcurrent_streamwait_streamrecord_stream)r6   r'   chunk_sizesrZ   inputstreamsr   r8   r   r!   main_streamr   r   r   r<      s&   
zScatter.forwardc                 G   s    d d d t j| j| jg|R  fS r    )rX   r>   r+   rZ   )r6   rk   r   r   r   rA      s    zScatter.backwardNrB   r   r   r   r   rj   ~   rW   rj   _streamsr   c                 C   s   | j dks
tj sdS tj j | j kr$tdtj j  d| j  tdu r0dgtj  at| j du r@t	| jt| j< t| j S )zBGet a background stream for copying between CPU and target device.r   Nz"Expected current accelerator type z to match device type )
r   r/   rp   rq   current_acceleratorr&   r{   device_countindexStream)r   r   r   r   rn      s   
rn   )re   	itertoolsr   r/   torch._utilsr   torch.autogradr   torch.nn.parallelr   r   r=   rX   rj   r{   listr   __annotations__r   rn   r   r   r   r   <module>   s   
 %#,!