
    rh'                     H   d Z ddlZddlmZmZ ddlZddlmZ ddl	m
Z
mZmZmZmZ ddlmZmZ ddlmZmZmZmZ d	d
lmZ  e       rddlZddlmZ ddlmZmZ ddlm Z m!Z!m"Z"  e       rddl	m#Z#  G d de      Z$ eded       ed       G d de                    Z%dgZ&y)z#video processor class for GLM-4.1V.    N)OptionalUnion   )BatchFeature)OPENAI_CLIP_MEANOPENAI_CLIP_STDChannelDimensionSizeDictget_image_size)UnpackVideosKwargs)
TensorTypeadd_start_docstringsis_torch_availableis_vision_available   )smart_resize)requires)BASE_VIDEO_PROCESSOR_DOCSTRINGBaseVideoProcessor)VideoMetadatagroup_videos_by_shapereorder_videos)PILImageResamplingc                       e Zd ZU dZeeef   ed<   dZe	e   ed<   dZ
e	e   ed<   dZe	e   ed<   dZe	ee      ed<   dZe	ee      ed<   y)Glm4vVideoProcessorInitKwargsNmax_image_size
patch_sizetemporal_patch_size
merge_size
image_mean	image_std)__name__
__module____qualname__r   dictstrint__annotations__r   r   r   r    r!   listfloatr"        /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/glm4v/video_processing_glm4v.pyr   r   9   se    %)NDcN) $J$)-#- $J$(,Je%,'+IxU$+r-   r   zfConstructs a fast GLM-4V image processor that dynamically resizes videos based on the original videos.aj  
        patch_size (`int`, *optional*, defaults to 14):
            The spacial patch size of the vision encoder.
        temporal_patch_size (`int`, *optional*, defaults to 2):
            The temporal patch size of the vision encoder.
        merge_size (`int`, *optional*, defaults to 2):
            The merge size of the vision encoder to llm encoder.
    )torchvision)backendsc                        e Zd Zej                  ZdddZddiZeZ	e
ZdZdZdZdZdZdZdZdZdZeZd	ZdZd
dgZdee   f fdZdej:                  deee f   fdZ!ddej                  ddddddddddfde"ej:                     de#ee"e   e"e    f      de$dede$de%de$de$de#ee%e"e%   f      de#ee%e"e%   f      de#e&   de#e&   de#e&   d e#ee'e(f      fd!Z) xZ*S )"Glm4vVideoProcessori 1  i )shortest_edgelongest_edger4   T      i,     pixel_values_videosvideo_grid_thwkwargsc                 $    t        |   di | y )Nr,   )super__init__)selfr:   	__class__s     r.   r=   zGlm4vVideoProcessor.__init__d   s    "6"r-   videometadatac                    |j                   d   }t        |dd      }t        |d|      }|dz
  }t        |dd       }|t        ||z        dz   }|| j                  k  rzt	        t        j                  || j                  z              }t        |      D 	cg c]:  }	t        |t	        t        j                  |	|z  | j                  z                    < }
}	nt	        | j                  | j                  z        }||k\  rt        t        |            }
nQt        j                  d||d      }|D cg c]-  }t        |t	        t        j                  ||z                    / }
}t               g }}|
D ])  }||vs|j                  |       |j!                  |       + t#        |      dz  r|j!                  |d	          |}
||
   }|
D cg c]  }t	        ||z         }}|d d d
   }||fS c c}	w c c}w c c}w )Nr   fpsg       @total_num_framesr   durationT)endpointr6   )shapegetattrroundmax_durationr(   mathfloorrC   rangeminceilr*   nplinspacesetaddappendlen)r>   r@   rA   total_frames	video_fpsmeta_framesmax_frame_idxrE   niframe_indicesnum_samplestarget_secondstseenuniqidxsampled_videofull_second_idxssecond_idxss                       r.   sample_framesz!Glm4vVideoProcessor.sample_framesg   s   
 {{1~HeS1	h(:LI#a8Z6]Y67!;Ht(((DJJx$((234Achijckl^_SDIIa)mdhh>V4W0XYlMld//$((:;Kk) $U;%7 8!#Q+PT!U\j kWX]C		!i-8P4Q!R k kUBd  	!C$C 	!
 t9q=KKR!m,<IJSCi0JJ&ss+k))- m !l Ks   ?G/12G4G9Ngp?videosvideo_metadata	do_resizeinterpolation
do_rescalerescale_factordo_normalizedo_sample_framesr!   r"   r   r   r    return_tensorsc                 4   g }|rp|t        |t              r|d   t        d      g }t        ||      D ]<  \  }}| j	                  ||      \  }}|j                  |       |j                  |       > n>|}|D cg c]'  }t        t        |            D cg c]  }|dz  	 c}) }}}|d d d   }t        |      \  }}i }|j                         D ]  \  }}|j                  \  }}}}}|||}"}!} |rot        | |!|"|||z  | j                  d         \  }#}$|j                  ||z  |||      }| j                  |t        |#|$      |      }|j                  ||||#|$      }|||<    t!        ||      }%t        |%      \  }}i }&i }'|j                         D ]  \  }}t#        |d   t$        j&                  	      \  }#}$| j)                  |||||	|
      }|}(|(j                  d
   |z  dk7  r:|(d d dd f   j+                  d
|d
z
  d
d
d
      })t-        j.                  |(|)gd
      }(|(j                  d d \  }*}+},|+|z  }+|#|z  |$|z  }.}-|(j                  |*|+||,|-|z  |||.|z  ||
      }(|(j1                  dd
dddddddd
      }(|(j3                  |*|+|-z  |.z  |,|z  |z  |z        }/|/|&|<   |+|-|.gg|*z  |'|<     t!        |&|      }t!        |'|      }'t-        j.                  |d      }0t-        j4                  |'      }1|0|1|d}2t7        |2|      S c c}w c c}}w )Nr   zFrame sampling is enabled but no video metadata was found. Please pass in `VideoMetadata` object per each input video or set `do_sample_frames=False`   r6   r4   )
num_framesheightwidthtemporal_factorfactor
max_pixels)rt   ru   )sizerk   )channel_dimr   rG   )dimr                  	   )r8   r9   
timestamps)datatensor_type)
isinstancer*   
ValueErrorziprg   rU   rN   rV   r   itemsrH   r   r   viewresizer
   r   r   r	   FIRSTrescale_and_normalizerepeattorchcatpermutereshapetensorr   )3r>   rh   ri   rj   rk   rl   rm   rn   ro   r!   r"   r   r   r    rp   r:   timestamps_listprocessed_videosr@   rA   r   rc   grouped_videosgrouped_videos_indexresized_videos_groupedrH   stacked_videosBTCHWrs   rt   ru   resized_heightresized_widthresized_videosprocessed_videos_groupedprocessed_gridspatchesrepeats
batch_sizegrid_tchannelgrid_hgrid_wflatten_patchesr8   r9   r   s3                                                      r.   _preprocesszGlm4vVideoProcessor._preprocess   s   $ %*^T*J~^_O`Oh q   "#&v~#> /x$($6$6uh$G!z&&z2 ''./  &U[\EU3u:5FGcr	G\O\-cc2O/DEU/V,,!#%3%9%9%; 	;!E>*00MAq!Q()1aJ0<)!$7%
2#22>B1- "0!4!4QUAq!!D!%"!}M"/ "- "
 "0!4!4Q1nm!\,:"5)'	;( ((>@TU 0E^/T,,#% %3%9%9%; %	M!E>,:>!;LZjZpZp,q)NM "77
NL*V_N %G }}Q"55:!!RS&/004G!4KQPQSTU))Wg$6A>*1--*;'J22F+z9=J;VFFll#*$*$G ooaAq!Q1aCG%oo&(--
:ZGO />$U+'-vv&>%?*%LOE"K%	MN **BDXY(:NO#ii(8a@o6#6,)
 >BBe  H\s   <LL#LL)+r#   r$   r%   r   BICUBICresamplery   r   r   r!   r   r"   rj   rl   rn   do_convert_rgbro   r   r   rK   r    r   valid_kwargsrs   rC   model_input_namesr   r=   r   Tensorr   r   r&   rg   r*   r   boolr+   r(   r'   r   r   __classcell__)r?   s   @r.   r2   r2   B   s    "))H&8KLD$&9:N!JIIJLNJLJ0LJ
C.0@A#(E!F #%*||%* t+,%*T LP,>,F,F )!!%:>9=$(-1$(;?sCU\\"sC !tM':DJ'F!GHsC 	sC
 *sC sC sC sC sC U5$u+#567sC E%e"456sC SMsC &c]sC SMsC !sJ!78sCr-   r2   )'__doc__rL   typingr   r   numpyrQ   image_processing_utilsr   image_utilsr   r   r	   r
   r   processing_utilsr   r   utilsr   r   r   r   image_processing_glm4vr   r   utils.import_utilsr   video_processing_utilsr   r   video_utilsr   r   r   r   r   r2   __all__r,   r-   r.   <module>r      s    *  "   5  1  * P O 1,L , l" 
#$rC, rC %rCj !
!r-   