
    rh                          d dl mZmZ d dlZddlmZ ddlmZm	Z	 ddl
mZmZmZmZmZ ddlmZmZ  G d d	e      Z G d
 ded      Z G d de      ZdgZy)    )OptionalUnionN   )BatchFeature)
ImageInputmake_nested_list_of_images)AudioKwargsImagesKwargsProcessingKwargsProcessorMixinUnpack)PreTokenizedInput	TextInputc                   ^    e Zd ZU ee   ed<   ee   ed<   ee   ed<   ee   ed<   ee   ed<   y)Gemma3nImagesKwargsdo_pan_and_scanpan_and_scan_min_crop_sizepan_and_scan_max_num_crops"pan_and_scan_min_ratio_to_activatedo_convert_rgbN)__name__
__module____qualname__r   bool__annotations__intfloat     /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/gemma3n/processing_gemma3n.pyr   r      s4    d^# (- (-(07TN"r   r   c                   .    e Zd ZU eed<   eed<   dddiiZy)Gemma3nProcessorKwargsaudio_kwargsimages_kwargstext_kwargspaddingFN)r   r   r   r	   r   r   	_defaultsr   r   r    r"   r"   "   s"    &&u
Ir   r"   F)totalc                        e Zd ZdZg dZdZdZdZ	 	 	 ddedef fdZ		 	 	 	 dd	e
d
eeeee   ee   f   deeej"                  ee   eej"                     eee      f      dee   def
dZd Zd Zed        Z xZS )Gemma3nProcessorat  
    A processor for Gemma 3n, wrapping the full capabilities of a feature extractor, image processor, and tokenizer
    into a single processor.

    Args:
        feature_extractor (`Gemma3nAudioFeatureExtractor`):
            Feature extractor that converts raw audio waveforms into MEL spectrograms for the audio encoder. This
            should return a `BatchFeature` with `input_features` and `input_features_mask` features.
        image_processor (`SiglipImageProcessorFast`):
            Image processor that prepares batches of images for the vision encoder. This should return a `BatchFeature`
            with a `pixel_values` feature.
        tokenizer (`GemmaTokenizerFast`):
            The text tokenizer for the model.
        chat_template (`string`, *optional*):
            A Jinja template for generating text prompts from a set of messages.
        audio_seq_length (int, *optional*, defaults to 188):
            The number of audio soft tokens that will be added to the text prompt
        image_seq_length (int, *optional*, defaults to 256):
            The number of image soft tokens that should be added to
    )feature_extractorimage_processor	tokenizerAutoFeatureExtractorAutoImageProcessorAutoTokenizeraudio_seq_lengthimage_seq_lengthc                    || _         |j                  | _        |j                  | _        |j                  | _        dj	                  |j                  g|z        }d|j                   | |j
                   d| _        || _        |j                  | _        |j                  | _	        |j                  | _
        dj	                  |j                  g|z        }	d|j                   |	 |j                   d| _        t        
| 8  d||||d| y )N z

)r+   r,   r-   chat_templater   )r1   audio_token_id	boa_tokenaudio_tokenjoin	eoa_tokenfull_audio_sequencer2   image_token_id	boi_tokenimage_token	eoi_tokenfull_image_sequencesuper__init__)selfr+   r,   r-   r5   r1   r2   kwargsaudio_tokens_expandedimage_tokens_expanded	__class__s             r    rB   zGemma3nProcessor.__init__G   s!    !1'66",,$00 ")>)>(?BR(R S%))*=*=)>?T>UV_ViViUjjn#o  0'66",,$00 ")>)>(?BR(R S%))*=*=)>?T>UV_ViViUjjn#o  	
/+'		

 	
r   imagestextaudiorD   returnc           	      R   |||t        d       | j                  t        fd| j                  j                  i|}t        |t              r|g}n.t        |t              st        |d   t              st        d      |e | j                  |fi |d   }|s|D cg c]  }| j                   }}|D 	cg c](  }	|	j                  | j                  | j                        * }}	ni }|t        |      }
 | j                  |
fi |d   }|s5|
D cg c]*  }dj                  | j                  gt!        |      z        , }}t!        |
      t!        |      k7  r$t        dt!        |
       d	t!        |       d
      |D 	cg c](  }	|	j                  | j                  | j"                        * }}	ni }|d   j%                  dd       } | j                  dd|i|d   ddi}| j'                  ||dg       |d   }t)        j*                  |      }d||| j,                  k(  <   d||| j.                  k(  <   |j1                         D ci c]  \  }}||j3                          }}}|j3                         |d<   t5        i ||||      S c c}w c c}	w c c}w c c}	w c c}}w )Nz5Provide at least one of `text`, `images`, or `audio`.tokenizer_init_kwargsr   zAInvalid input text. Please provide a string, or a list of stringsr#   r$    z1Received inconsistently sized batches of images (z) and text (z).r%   return_tensorsrI   npimage)
modalities	input_ids   r   token_type_ids)datatensor_typer   )
ValueError_merge_kwargsr"   r-   init_kwargs
isinstancestrlistr+   r8   replacer;   r   r,   r9   r>   lenr@   pop_check_special_mm_tokensrP   
zeros_liker<   r6   itemstolistr   )rC   rH   rI   rJ   videosrD   output_kwargsaudio_inputs_promptbatched_imagesimage_inputsrO   text_inputs	array_idsrU   kvs                     r    __call__zGemma3nProcessor.__call__g   s    <FNu}TUU***"
"&.."<"<
 
 dC 6DD$'
47C0H`aa1411%Y=;XYL278Q((88 ^bbSYFNN4#3#3T5M5MNbDbL7?N/4//a-P_B`aL Q_`v$"2"2!3c&k!AB``>"c$i/ GNH[G\\hilmqirhssuv 
 ^bbSYFNN4#3#3T5M5MNbDbL&}599:JDQ$dnnd$d-2Nd_cd%%dKWI%N  ,	y1;<yD$7$778;<yD$7$7781<1B1B1DEAq!((*}EE(6(=(=(?$%!PK!P<!P<!P^lmmI 9 c a c Fs   J8-J/J-JJ#c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to GemmaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )r-   batch_decoderC   argsrD   s      r    rr   zGemma3nProcessor.batch_decode   s     
 +t~~**D;F;;r   c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to GemmaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )r-   decoders   s      r    rv   zGemma3nProcessor.decode   s     
 %t~~$$d5f55r   c                     | j                   j                  dgz   }| j                  j                  }| j                  j                  }t	        t
        j                  ||z   |z               S )NrU   )r-   model_input_namesr,   r+   r]   dictfromkeys)rC   tokenizer_input_namesimage_processor_input_namesfeature_extactor_input_namess       r    rx   z"Gemma3nProcessor.model_input_names   s`     $ @ @DTCU U&*&:&:&L&L#'+'='='O'O$DMM"7:U"UXt"tuvvr   )N      )NNNN)r   r   r   __doc__
attributesfeature_extractor_classimage_processor_classtokenizer_classr   rB   r   r   r   r   r]   r   rP   ndarrayr   r   r"   r   rp   rr   rv   propertyrx   __classcell__)rG   s   @r    r*   r*   ,   s    * GJ40%O  # #
 
 
D "^b_c>n>n I0$y/4HYCZZ[>n bjj$u+tBJJ7GdSXkIZZ[\	>n /0>n 
>nB<6 w wr   r*   )typingr   r   numpyrP   feature_extraction_utilsr   image_utilsr   r   processing_utilsr	   r
   r   r   r   tokenization_utils_baser   r   r   r"   r*   __all__r   r   r    <module>r      sR     #  4 A c c C#, #-U Pw~ Pwf 
r   