
    rh                     @   d Z ddlmZmZ ddlmZ ddlmZmZm	Z	 ddl
mZmZ ddlmZmZmZmZmZ ddlmZ dd	lmZ dd
lmZmZmZ  e       r e       rddlmZ nddlmZ  e       rddlZ e       rddlm Z   G d de      Z! ed       G d de             Z"dgZ#y)z(Fast Video processor class for InternVL.    )OptionalUnion   )BatchFeature)OPENAI_CLIP_MEANOPENAI_CLIP_STDSizeDict)UnpackVideosKwargs)
TensorTypeis_torch_availableis_torchvision_availableis_torchvision_v2_availableis_vision_available)requires)BaseVideoProcessor)VideoMetadatagroup_videos_by_shapereorder_videos)
functionalN)PILImageResamplingc                   $    e Zd ZU eeeef   ed<   y) InternVLVideoProcessorInitKwargsinitial_shiftN)__name__
__module____qualname__r   boolfloatint__annotations__     /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/internvl/video_processing_internvl.pyr   r   4   s    uc)**r#   r   )torchvision)backendsc            -           e Zd Zej                  ZeZeZ	dddZ
dZdZdZdZdZdZeZdgZdee   f fdZ	 	 	 	 d$dd	d
eeeef      dee   deeeef      deeeeef      f
dZ	 	 	 	 	 	 d%ded	   deee   ee   f   dedededee   ded   dededededededeeeee   f      deeeee   f      dee   deeeef      dee   deeeeef      d eee e!f      d!ed	   d"e"f,d#Z# xZ$S )&InternVLVideoProcessori  )heightwidthTFpixel_values_videoskwargsc                 $    t        |   di | y )Nr"   )super__init__)selfr,   	__class__s     r$   r/   zInternVLVideoProcessor.__init__G   s    "6"r#   videoztorch.Tensormetadata
num_framesfpsr   c                 j   ||n| j                   }||n| j                  }|j                  d   }|#|!|t        d      t	        ||d   z  |z        }|du r||z  dz  }||kD  rt        d| d| d      t        j                  ||||z        j	                         }||   j                         }|S )	aj  
        Default sampling function which uniformly samples the desired number of frames between 0 and total number of frames.
        If `fps` is passed along with metadata, `fps` frames per second are sampled uniformty. Arguments `num_frames`
        and `fps` are mutually exclusive.

        Args:
            video (`torch.Tensor`):
                Video that need to be sampled.
            metadata (`VideoMetadata`, *optional*):
                Metadata of the video containing information about total duration, fps and total number of frames.
            num_frames (`int`, *optional*):
                Maximum number of frames to sample. Defaults to `self.num_frames`.
            fps (`int` or `float`, *optional*):
                Target frames to sample per second. Defaults to `self.fps`.
            initial_shift (`bool`, `float` or `int`, defaults to `self.initial_shift`):
                The initial shift to apply when sampling frames. If `True`, the shift is set so that frames are sampled from the middle of the video.

        Returns:
            torch.Tensor:
                Sampled video frames.
        r   zAsked to sample `fps` frames per second but no video metadata was provided which is required when sampling with `fps`. Please pass in `VideoMetadata` object or use a fixed `num_frames` per input videor5   T   z(Video can't be sampled. The `num_frames=z` exceeds `total_num_frames=z`. )r4   r   shape
ValueErrorr    torcharange
contiguous)r0   r2   r3   r4   r5   r   total_num_framesindicess           r$   sample_framesz$InternVLVideoProcessor.sample_framesJ   s    : $.#9Zt
)6)BHZHZ ;;q> #/ h  -?#EFJD ,z9A=M((::,Fbcsbttwx  ,,}.>@PS]@]^bbdg))+r#   videosvideo_metadatado_convert_rgb	do_resizesizesize_divisorinterpolationzF.InterpolationModedo_center_crop	crop_size
do_rescaledo_padrescale_factordo_normalize
image_mean	image_stddo_sample_framesreturn_tensorsdevicereturnc                    |r2t        ||      D cg c]  \  }}| j                  |||||       }}}||D cg c]  }|j                  |       }}t        |      \  }}i }|j	                         D ]4  \  }}|r| j                  |      }|r| j                  ||||      }|||<   6 t        ||      }t        |      \  }}i }|j	                         D ]4  \  }}|r| j                  ||	      }| j                  ||
||||      }|||<   6 t        ||      }|rt        j                  |d      n|}t        d|i|      S c c}}w c c}w )N)r5   r4   r   )rD   rE   rF   r   )dimr+   )datatensor_type)zipr?   tor   itemsconvert_to_rgbresizer   center_croprescale_and_normalizer:   stackr   ) r0   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   r5   r4   r   rP   rQ   r2   r3   grouped_videosgrouped_videos_indexresized_videos_groupedr8   stacked_videosresized_videosprocessed_videos_groupedprocessed_videoss                                    r$   _preprocessz"InternVLVideoProcessor._preprocess   s   0  (+6>'B#E8 ""5(
bo"pF  4:;5ehhv&;F; 0EV/L,,!#%3%9%9%; 	;!E>!%!4!4^!D!%"LXe "- " -;"5)	; ((>@TU 0E^/T,,#% %3%9%9%; 	=!E>!%!1!1.)!L!77
NL*V_N /=$U+	= **BDXYCQ5;;'7Q?Wg"79I!JXfggO <s
    EE)NNNN)NNNNNN)%r   r   r   r   BICUBICresampler   rM   r   rN   rD   rC   rI   rL   rB   r   rO   r   valid_kwargsmodel_input_namesr
   r/   r   r   r   dictr    r   r   r?   listr	   strr   r   rf   __classcell__)r1   s   @r$   r(   r(   8   sz   !))H!JIC(DIJLNM3L./#(H!I # :>$(+/;?44 5!4564 SM	4
 eCJ'(4  dE3&6 784N ,0+/$(;?;?+/-Ah^$Ah d=14:=>Ah 	Ah
 Ah Ah smAh   56Ah Ah Ah Ah Ah Ah Ah U5$u+#567Ah  E%e"456!Ah" #4.#Ah$ eCJ'(%Ah& SM'Ah(  dE3&6 78)Ah* !sJ!78+Ah, (-Ah. 
/Ahr#   r(   )$__doc__typingr   r   image_processing_utilsr   image_utilsr   r   r	   processing_utilsr
   r   utilsr   r   r   r   r   utils.import_utilsr   video_processing_utilsr   video_utilsr   r   r   torchvision.transforms.v2r   Ftorchvision.transformsr:   r   r   r(   __all__r"   r#   r$   <module>r|      s    / " 2 
 5  + 8 O O "$=: 1+| + 
#$Hh/ Hh %HhV $
$r#   