o
    oi;                     @   s   d dl Z d dlZd dlZd dlmZmZmZmZ d dlZddl	m
Z
 edZdd Zddejd	ed
ededejf
ddZG dd dZdedefddZG dd dZdS )    N)AnyOptionalTypeVarUnion   )tqdmTc                  C   s&   zdd l } W | S  ty   tdw )Nr   zVideo decoding capabilities were removed from torchvision and migrated to TorchCodec. Please install TorchCodec following instructions at https://github.com/pytorch/torchcodec#installing-torchcodec)
torchcodecImportError)r	    r   j/var/www/addictedbytheproject.nl/epg/venv/lib/python3.10/site-packages/torchvision/datasets/video_utils.py_get_torchcodec   s   
r   tensorsizestepdilationreturnc                 C   s   |   dkrtd|    | d}|  }|| || f}|||d  d  | d |f}|d dk r:d|f}t| ||S )a   
    similar to tensor.unfold, but with the dilation
    and specialized for 1d tensors

    Returns all consecutive windows of `size` elements, with
    `step` between windows. The distance between each element
    in a window is given by `dilation`.
    r   z*tensor should have 1 dimension instead of r   )dim
ValueErrorstridenumeltorch
as_strided)r   r   r   r   o_strider   
new_stridenew_sizer   r   r   unfold   s   	
 r   c                   @   sV   e Zd ZdZdee ddfddZdefddZd	ede	ee e
e f fd
dZdS )_VideoTimestampsDatasetz
    Dataset used to parallelize the reading of the timestamps
    of a list of videos, given their paths in the filesystem.

    Used in VideoClips and defined at top level, so it can be
    pickled when forking.
    video_pathsr   Nc                 C   s
   || _ d S Nr   )selfr   r   r   r   __init__6      
z _VideoTimestampsDataset.__init__c                 C   
   t | jS r   lenr   r!   r   r   r   __len__9   r#   z_VideoTimestampsDataset.__len__idxc                 C   s8   t  }|j| j| }|jj}|jj}tt||fS r   )	r   decodersVideoDecoderr   metadata
num_framesaverage_fpslistrange)r!   r)   r	   decoderr-   fpsr   r   r   __getitem__<   s
   z#_VideoTimestampsDataset.__getitem__)__name__
__module____qualname____doc__r/   strr"   intr(   tupler   floatr3   r   r   r   r   r   -   s
    &r   xc                 C   s   | S )zH
    Dummy collate function to be used with _VideoTimestampsDataset
    r   )r<   r   r   r   _collate_fnD   s   r=   c                   @   s  e Zd ZdZ												d=dee ded	ed
ee dee	ee
f  dededededededededdfddZd>ddZde	ee
f ddfddZede	ee
f fddZdee dd fdd Ze	d?d!ejd"ed#ed$ee d
ee deejeee ejf f fd%d&Zd?d"ed#ed
ee ddfd'd(Zdefd)d*Zdefd+d,Zdefd-d.Zd/edeeef fd0d1Zed"ed2ed3edeeejf fd4d5Zd/edeejeje	ee
f ef fd6d7Zde	ee
f fd8d9Zd:e	ee
f ddfd;d<Z dS )@
VideoClipsaE  
    Given a list of video files, computes all consecutive subvideos of size
    `clip_length_in_frames`, where the distance between each subvideo in the
    same video is defined by `frames_between_clips`.
    If `frame_rate` is specified, it will also resample all the videos to have
    the same frame rate, and the clips will refer to this frame rate.

    Creating this instance the first time is time-consuming, as it needs to
    decode all the videos in `video_paths`. It is recommended that you
    cache the results after instantiation of the class.

    Recreating the clips for different clip lengths is fast, and can be done
    with the `compute_clips` method.

    Args:
        video_paths (List[str]): paths to the video files
        clip_length_in_frames (int): size of a clip in number of frames
        frames_between_clips (int): step (in frames) between each clip
        frame_rate (float, optional): if specified, it will resample the video
            so that it has `frame_rate`, and then the clips will be defined
            on the resampled video
        num_workers (int): how many subprocesses to use for data loading.
            0 means that the data will be loaded in the main process. (default: 0)
        output_format (str): The format of the output video tensors. Can be either "THWC" (default) or "TCHW".
       r   Nr   THWCr   clip_length_in_framesframes_between_clips
frame_rate_precomputed_metadatanum_workers_video_width_video_height_video_min_dimension_video_max_dimension_audio_samples_audio_channelsoutput_formatr   c                 C   s   || _ || _|| _|| _|	| _|
| _|| _|| _| | _	| j	dvr*t
d| d|d u r3|   n| | | ||| d S )N)r@   TCHWz5output_format should be either 'THWC' or 'TCHW', got .)r   rE   rF   rG   rH   rI   rJ   rK   upperrL   r   _compute_frame_pts_init_from_metadatacompute_clips)r!   r   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   r   r   r   r"   f   s   



zVideoClips.__init__c                    s   g | _ g | _dd l  jjjt| jd| jt	d}t
t|d0}|D ]$}|d tt| \}} fdd|D }| j | | j| q#W d    d S 1 sSw   Y  d S )Nr   r?   )
batch_sizerE   
collate_fn)totalr   c                    s   g | ]
} j | jd qS )dtype)	as_tensorlong).0ptsr   r   r   
<listcomp>   s    z1VideoClips._compute_frame_pts.<locals>.<listcomp>)	video_pts	video_fpstorch.utils.datautilsdata
DataLoaderr   r   rE   r=   r   r&   updater/   zipextend)r!   dlpbarbatch	batch_pts	batch_fpsr   r\   r   rP      s$   
"zVideoClips._compute_frame_ptsr,   c                 C   sV   |d | _ t| j t|d ksJ |d | _t| j t|d ks$J |d | _d S )Nr   r^   r_   )r   r&   r^   r_   )r!   r,   r   r   r   rQ      s
   

zVideoClips._init_from_metadatac                 C   s   | j | j| jd}|S )Nr   r^   r_   rl   )r!   	_metadatar   r   r   r,      s
   zVideoClips.metadataindicesc                    s~    fdd|D } fdd|D } fdd|D }|||d}t  | j j j| j j j j j j	 j
 jdS )Nc                       g | ]} j | qS r   r    rZ   ir'   r   r   r]          z%VideoClips.subset.<locals>.<listcomp>c                    ro   r   )r^   rp   r'   r   r   r]      rr   c                    ro   r   )r_   rp   r'   r   r   r]      rr   rl   )rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   )typer-   r   rC   rE   rF   rG   rH   rI   rJ   rK   rL   )r!   rn   r   r^   r_   r,   r   r'   r   subset   s,   zVideoClips.subsetr^   r-   r   r2   c           	      C   s   |d u rd}|d u r|}t | | | }ttt|||}| | } t| ||}| s3t	d t
|trC|gt | }||fS t|||}||fS )Nr   zThere aren't enough frames in the current video to get a clip for the given clip length and frames between clips. The video (and potentially others) will be skipped.)r&   r>   _resample_video_idxr9   mathfloorr   r   warningswarn
isinstanceslice)	r^   r-   r   r2   rC   total_frames_idxsclipsidxsr   r   r   compute_clips_for_video   s"   
z"VideoClips.compute_clips_for_videoc           	      C   s   || _ || _|| _g | _g | _t| j| jD ]\}}| |||||\}}| j	| | j	| qt
dd | jD }|d | _dS )a  
        Compute all consecutive sequences of clips from video_pts.
        Always returns clips of size `num_frames`, meaning that the
        last few frames in a video can potentially be dropped.

        Args:
            num_frames (int): number of frames for the clip
            step (int): distance between two clips
            frame_rate (int, optional): The frame rate
        c                 S      g | ]}t |qS r   r&   rZ   vr   r   r   r]          z,VideoClips.compute_clips.<locals>.<listcomp>r   N)r-   r   rC   r~   resampling_idxsre   r^   r_   r   appendr   rX   cumsumtolistcumulative_sizes)	r!   r-   r   rC   r^   r2   r~   r   clip_lengthsr   r   r   rR      s   zVideoClips.compute_clipsc                 C   s   |   S r   )	num_clipsr'   r   r   r   r(      s   zVideoClips.__len__c                 C   r$   r   r%   r'   r   r   r   
num_videos  r#   zVideoClips.num_videosc                 C   s
   | j d S )zJ
        Number of subclips that are available in the video list.
        )r   r'   r   r   r   r     s   
zVideoClips.num_clipsr)   c                 C   s<   t | j|}|dkr|}||fS || j|d   }||fS )zw
        Converts a flattened representation of the indices into a video_idx, clip_idx
        representation.
        r   r   )bisectbisect_rightr   )r!   r)   	video_idxclip_idxr   r   r   get_clip_location  s   zVideoClips.get_clip_locationoriginal_fpsnew_fpsc                 C   sL   || }|  rt|}td d |S tj| tjd| }| tj}|S )NrV   )	
is_integerr9   r{   r   arangefloat32rw   toint64)r-   r   r   r   r   r   r   r   ru     s   zVideoClips._resample_video_idxc                 C   s  ||   krtd| d|    d| |\}}| j| }| j| | }t|d  }t|d  }t }| jdkr@dnd}	|j	j
||	d	}
|
jtt||d
 dj}|
jj}|| }|d
 | }z|j	|}|j||d}|j}W n ty   tjdtjd}Y nw d|i}| jdur| j| | }t|tjr||d  }|| }| j|d< t|| jksJ |j d| j ||||fS )a7  
        Gets a subclip from a list of videos.

        Args:
            idx (int): index of the subclip. Must be between 0 and num_clips().

        Returns:
            video (Tensor)
            audio (Tensor)
            info (Dict)
            video_idx (int): index of the video in `video_paths`
        zIndex z out of range (z number of clips)r   r   r@   NHWCNCHW)dimension_orderr   )rn   )start_secondsstop_seconds)r   r   rV   r_   Nz x )r   
IndexErrorr   r   r~   r9   itemr   rL   r*   r+   get_frames_atr/   r0   rb   r,   r.   AudioDecoderget_samples_played_in_range	Exceptionr   emptyr   rC   r   rz   Tensorr&   r-   shape)r!   r)   r   r   
video_pathclip_pts	start_idxend_idxr	   r   r1   videor2   	start_secend_secaudio_decoderaudio_samplesaudioinforesampling_idxr   r   r   get_clip#  s<   



$zVideoClips.get_clipc                 C   sn   dd | j D }dd | j D }|rt|}| }| j }||d< ||d< |d= |d= |d= d	|d
< |S )Nc                 S   r   r   r   r   r   r   r   r]   W  r   z+VideoClips.__getstate__.<locals>.<listcomp>c                 S   s   g | ]}| tjqS r   )r   r   r   )rZ   r<   r   r   r   r]   \  s    video_pts_sizesr^   r~   r   r      _version)r^   r   catnumpy__dict__copy)r!   r   r^   dr   r   r   __getstate__V  s   

zVideoClips.__getstate__r   c                 C   sf   d|vr	|| _ d S tj|d tjd}tj||d dd}|d= ||d< || _ | | j| j| j d S )Nr   r^   rV   r   r   )r   )	r   r   rX   r   splitrR   r-   r   rC   )r!   r   r^   r   r   r   __setstate__r  s   zVideoClips.__setstate__)r?   r   NNr   r   r   r   r   r   r   r@   )r   Nr   )!r4   r5   r6   r7   r/   r8   r9   r   r;   dictr   r"   rP   rQ   propertyr,   rt   staticmethodr   r   r:   r   r{   r   rR   r(   r   r   r   ru   r   r   r   r   r   r   r   r>   K   s    	


% &*3r>   )r   )r   rv   rx   typingr   r   r   r   r   ra   r   r   r   r   r9   r   r   r=   r>   r   r   r   r   <module>   s    $