
    rh)                         d Z ddlZddlmZmZ ddlmZ ddlmZ ddl	m
Z
mZmZmZmZ ddlmZmZ dd	lmZ d
dlmZ  ej,                  e      Z G d de      ZdgZy)zq
Processor class for InstructBLIP. Largely copy of Blip2Processor with addition of a tokenizer for the Q-Former.
    N)OptionalUnion   )BatchFeature)ProcessorMixin)
AddedTokenPaddingStrategyPreTokenizedInput	TextInputTruncationStrategy)
TensorTypelogging)
VideoInput   )AutoTokenizerc            $       4    e Zd ZdZg dZdZdZdZd fd	Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dde	de
eeee   ee   f   ded	e
eeef   d
e
eeef   dee   dedee   dee   dededededededee
eef      def"dZd Zd Zed        Z fdZe fd       Z xZS )InstructBlipVideoProcessora  
    Constructs an InstructBLIPVideo processor which wraps a InstructBLIP image processor and a LLaMa/T5 tokenizer into a single
    processor.

    [`InstructBlipVideoProcessor`] offers all the functionalities of [`InstructBlipVideoImageProcessor`] and [`AutoTokenizer`]. See the
    docstring of [`~InstructBlipVideoProcessor.__call__`] and [`~InstructBlipVideoProcessor.decode`] for more information.

    Args:
        video_processor (`InstructBlipVideoVideoProcessor`):
            An instance of [`InstructBlipVideoVideoProcessor`]. The video processor is a required input.
        tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
        qformer_tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The Q-Former tokenizer is a required input.
        num_query_tokens (`int`, *optional*):
            Number of tokens used by the Qformer as queries, should be same as in model's config.
    )video_processor	tokenizerqformer_tokenizerAutoVideoProcessorr   c                     t        |d      s2t        ddd      | _        |j                  | j                  gd       n|j                  | _        || _        t
        |   |||       y )Nvideo_tokenz<video>FT)
normalizedspecial)special_tokens)hasattrr   r   
add_tokensnum_query_tokenssuper__init__)selfr   r   r   r   kwargs	__class__s         /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/instructblipvideo/processing_instructblipvideo.pyr!   z#InstructBlipVideoProcessor.__init__?   sb    y-0))tTD  $"2"2!3D I(44D 0)5FG    imagestextadd_special_tokenspadding
truncation
max_lengthstridepad_to_multiple_ofreturn_attention_maskreturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_token_type_idsreturn_lengthverbosereturn_tensorsreturnc                    ||t        d      i }|5t        |t              r|g}n.t        |t              st        |d   t              st        d       | j                  d||||||||	|
||||||d|}|j                  d      |d<   |j                  d      |d	<   ||| j                  z  } | j                  d||||||||	|
|||||dd|}|c| j                  j                  | j                  z  d
z  }| j                  |d|	|
||||d	      }|D ]  }||   D cg c]
  }||   |z    c}||<     |j                  |       |$| j                  ||      }|j                  |       t        ||      }|S c c}w )a%  
        This method uses [`InstructBlipVideoImageProcessor.__call__`] method to prepare image(s) or video(s) for the model, and
        [`BertTokenizerFast.__call__`] to prepare text for the model.

        Please refer to the docstring of the above two methods for more information.
        Nz3You have to specify at least one of images or text.r   zAInvalid input text. Please provide a string, or a list of strings)r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   	input_idsqformer_input_idsattention_maskqformer_attention_mask   F)r)   r/   r0   r1   r2   r3   r4   r6   )r6   )tensor_type )
ValueError
isinstancestrlistr   popr   r   r   contentupdater   r   )r"   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r#   encodingqformer_text_encodingtext_encodingvideo_tokensvideo_text_encodingksampleimage_encodings                             r%   __call__z#InstructBlipVideoProcessor.__call__H   s   4 >dlRSS$$vd+JtAw4L !dee$:D$:$: %#5%%#5&;*C+E'=&;+-%  !%!$ -B,E,Ek,RH()1F1J1JK[1\H-. %d333
*DNN #5%%#5&;*C+E'=&;+#  !M& !#//77$:O:OORSS&*nn ',*?.G/I+A*?"/#' '5 
'# ' hAVcdeVf'gF(;A(>(G'gM!$hOOM*!11&1XNOON+nE (hs   "E?c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )r   batch_decoder"   argsr#   s      r%   rQ   z'InstructBlipVideoProcessor.batch_decode   s     
 +t~~**D;F;;r&   c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )r   decoderR   s      r%   rU   z!InstructBlipVideoProcessor.decode   s     
 %t~~$$d5f55r&   c                     | j                   j                  }| j                  j                  }t        t        j                  ||z               S N)r   model_input_namesimage_processorrC   dictfromkeys)r"   tokenizer_input_namesimage_processor_input_namess      r%   rX   z,InstructBlipVideoProcessor.model_input_names   sA     !% @ @&*&:&:&L&L#DMM"7:U"UVWWr&   c                    t         j                  j                  |      rt        d| d      t        j                  |d       t         j                  j                  |d      }| j                  j                  |       d| j                  v }|r| j                  j                  d       t        |   |fi |}|r| xj                  dgz  c_        |S )NzProvided path (z#) should be a directory, not a fileT)exist_okr   )ospathisfiler@   makedirsjoinr   save_pretrained
attributesremover    )r"   save_directoryr#   qformer_tokenizer_pathqformer_presentoutputsr$   s         r%   re   z*InstructBlipVideoProcessor.save_pretrained   s    77>>.)~.>>abcc
NT2!#n>Q!R../EF .@OO""#67').CFCOO 344Or&   c                     t        |   |fi |}t        |t              r|d   }t	        j                  |d      }||_        |S )Nr   r   )	subfolder)r    from_pretrainedrA   tupler   r   )clspretrained_model_name_or_pathr#   	processorr   r$   s        r%   rn   z*InstructBlipVideoProcessor.from_pretrained   sP    G+,ITVT	 i'!!I)99:Wcvw&7	#r&   rW   )NNTFNNr   NNFFFFFTN)__name__
__module____qualname____doc__rf   video_processor_classtokenizer_classqformer_tokenizer_classr!   r   r   r   r
   rC   boolrB   r	   r   r   intr   r   rO   rQ   rU   propertyrX   re   classmethodrn   __classcell__)r$   s   @r%   r   r   '   s   $ GJ0%O-H "^b#'5:;?$(,004*/+0',&+#;?#ff I0$y/4HYCZZ[f !	f
 tS/12f $%778f SMf f %SMf  (~f $(f %)f !%f  $f f  !f" !sJ!78#f& 
'fR<6 X X&  r&   r   )rv   r`   typingr   r   image_processing_utilsr   processing_utilsr   tokenization_utils_baser   r	   r
   r   r   utilsr   r   video_utilsr   autor   
get_loggerrs   loggerr   __all__r?   r&   r%   <module>r      sZ    
 " 2 .  ) %   
		H	%} }@ (
(r&   