
    rh%                         d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
mZ ddlmZmZmZ dd	lmZ d
dlmZ  ej(                  e      Z G d de	d      Z G d de
      ZdgZy)zq
Processor class for InstructBLIP. Largely copy of Blip2Processor with addition of a tokenizer for the Q-Former.
    N)Union   )BatchFeature)
ImageInput)ProcessingKwargsProcessorMixinUnpack)
AddedTokenPreTokenizedInput	TextInput)logging   )AutoTokenizerc            
       *    e Zd Zdddddddddd	i dZy)InstructBlipProcessorKwargsTFr   )	add_special_tokenspaddingstridereturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_token_type_idsreturn_lengthverbose)text_kwargsimages_kwargsN)__name__
__module____qualname__	_defaults     /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/instructblip/processing_instructblip.pyr   r   !   s0     #').*/&+%*"

 Ir"   r   F)totalc            
            e Zd ZdZg dZdZdZdZd fd	Z	 	 	 	 dde	de
eeee   ee   f   dee   d	efd
Zd Zd Zed        Z fdZe fd       Z xZS )InstructBlipProcessora  
    Constructs an InstructBLIP processor which wraps a BLIP image processor and a LLaMa/T5 tokenizer into a single
    processor.

    [`InstructBlipProcessor`] offers all the functionalities of [`BlipImageProcessor`] and [`AutoTokenizer`]. See the
    docstring of [`~BlipProcessor.__call__`] and [`~BlipProcessor.decode`] for more information.

    Args:
        image_processor (`BlipImageProcessor`):
            An instance of [`BlipImageProcessor`]. The image processor is a required input.
        tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
        qformer_tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The Q-Former tokenizer is a required input.
        num_query_tokens (`int`, *optional*):"
            Number of tokens used by the Qformer as queries, should be same as in model's config.
    )image_processor	tokenizerqformer_tokenizer)BlipImageProcessorBlipImageProcessorFastr   c                     t        |d      s2t        ddd      | _        |j                  | j                  gd       n|j                  | _        || _        t
        |   |||       y )Nimage_tokenz<image>FT)
normalizedspecial)special_tokens)hasattrr
   r-   
add_tokensnum_query_tokenssuper__init__)selfr'   r(   r)   r3   kwargs	__class__s         r#   r5   zInstructBlipProcessor.__init__J   sb    y-0))tTD  $"2"2!3D I(44D 0)5FGr"   imagestextr7   returnc                    ||t        d       | j                  t        fd| j                  j                  i|}|d   j                  dd      }i }|Kt        |t              r|g}n.t        |t              st        |d   t              st        d       | j                  |fi |d   }	|	j                  d      |d	<   |	j                  d
      |d<   |d   j                  d      |d   dxx   | j                  z  cc<    | j                  |fi |d   }
|t| j                  j                  | j                  z  }d|d   d<   d|d   d<   d|d   d<    | j                  |fi |d   }|
D ]  }|
|   D cg c]
  }||   |z    c}|
|<     |j                  |
       |' | j                  |fi |d   }|j                  |       t!        ||      }|S c c}w )a  
        This method uses [`BlipImageProcessor.__call__`] method to prepare image(s) for the model, and
        [`BertTokenizerFast.__call__`] to prepare text for the model.

        Please refer to the docstring of the above two methods for more information.
        Args:
            images (`ImageInput`):
                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
                tensor. Both channels-first and channels-last formats are supported.
            text (`TextInput`, `PreTokenizedInput`, `list[TextInput]`, `list[PreTokenizedInput]`):
                The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
                (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
                `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
        Nz,You have to specify at least images or text.tokenizer_init_kwargsr   return_tensorsr   zAInvalid input text. Please provide a string, or a list of strings	input_idsqformer_input_idsattention_maskqformer_attention_mask
max_lengthFr   r   
truncationr   )tensor_type)
ValueError_merge_kwargsr   r(   init_kwargspop
isinstancestrlistr)   getr3   r-   contentupdater'   r   )r6   r9   r:   audiovideosr7   output_kwargsr>   encodingqformer_text_encodingtext_encodingimage_tokensimage_text_encodingksampleimage_encodings                   r#   __call__zInstructBlipProcessor.__call__T   s1   , >dlKLL***'
"&.."<"<
 
 '}599:JDQ$$vd+JtAw4L !dee$:D$:$:4$`=Q^C_$`!,A,E,Ek,RH()1F1J1JK[1\H-. ]+//=Im,\:d>S>SS:*DNN4P=3OPM!#//77$:O:OOEJm,-AB:?m,Y7=Bm,\:&4dnn\&b]S`Ea&b#& hAVcdeVf'gF(;A(>(G'gM!$hOOM*1T11&[M/<Z[NOON+  nE (hs   :Gc                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )r(   batch_decoder6   argsr7   s      r#   r]   z"InstructBlipProcessor.batch_decode   s     
 +t~~**D;F;;r"   c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )r(   decoder^   s      r#   ra   zInstructBlipProcessor.decode   s     
 %t~~$$d5f55r"   c                     | j                   j                  }| j                  j                  }t        t        j                  ||z               S N)r(   model_input_namesr'   rL   dictfromkeys)r6   tokenizer_input_namesimage_processor_input_namess      r#   rd   z'InstructBlipProcessor.model_input_names   sA     !% @ @&*&:&:&L&L#DMM"7:U"UVWWr"   c                    t         j                  j                  |      rt        d| d      t        j                  |d       t         j                  j                  |d      }| j                  j                  |       d| j                  v }|r| j                  j                  d       t        |   |fi |}|r| xj                  dgz  c_        |S )NzProvided path (z#) should be a directory, not a fileT)exist_okr)   )ospathisfilerF   makedirsjoinr)   save_pretrained
attributesremover4   )r6   save_directoryr7   qformer_tokenizer_pathqformer_presentoutputsr8   s         r#   rp   z%InstructBlipProcessor.save_pretrained   s    77>>.)~.>>abcc
NT2!#n>Q!R../EF .@OO""#67').CFCOO 344Or"   c                     t        |   |fi |}t        |t              r|d   }t	        j                  |d      }||_        |S )Nr   r)   )	subfolder)r4   from_pretrainedrJ   tupler   r)   )clspretrained_model_name_or_pathr7   	processorr)   r8   s        r#   ry   z%InstructBlipProcessor.from_pretrained   sP    G+,ITVT	 i'!!I)99:Wcvw&7	#r"   rc   )NNNN)r   r   r   __doc__rq   image_processor_classtokenizer_classqformer_tokenizer_classr5   r   r   r   r   rL   r	   r   r   r[   r]   ra   propertyrd   rp   classmethodry   __classcell__)r8   s   @r#   r&   r&   2   s    $ GJL%O-H "^bAA I0$y/4HYCZZ[A 45A 
AH<6 X X&  r"   r&   )r~   rk   typingr   image_processing_utilsr   image_utilsr   processing_utilsr   r   r	   tokenization_utils_baser
   r   r   utilsr   autor   
get_loggerr   loggerr   r&   __all__r!   r"   r#   <module>r      sf    
  2 % H H O O    
		H	%"2% "YN Yx #
#r"   