
    rhH                    ,   d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
mZmZ 	 d dlmZ d dlZd dlmZmZmZmZmZmZ d dlmZ d d	lmZ d d
lmZ  ej<                  e      Z er e       rd dl!m"Z" ddZ# G d de      Z$y# e$ r	 d dlmZ Y dw xY w)    )annotationsN)Path)TYPE_CHECKINGAnyCallable)load_onnx_modelload_openvino_model)Self)
AutoConfig	AutoModelAutoTokenizer	MT5ConfigPretrainedConfigT5Config)is_peft_available)find_adapter_config_file)InputModule
PeftConfigc                     d fd}|S )Nc                t    t        j                  t        |       z  d        t        |       z  fi |S )NT)exist_ok)osmakedirsr   )save_directorykwargs_save_pretrained_fn	subfolders     {/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/models/Transformer.pywrapperz)_save_pretrained_wrapper.<locals>.wrapper   s5    
D(94tD"4#7)#CNvNN    )r   z
str | PathreturnNone )r   r   r    s   `` r   _save_pretrained_wrapperr%      s    O Nr!   c                      e Zd ZU dZdZded<   ddgZded<   d	Zd
ed<   	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 	 	 	 	 	 	 	 	 	 ddZ		 	 	 	 	 	 	 	 	 	 	 	 ddZ
ddZddZddZddZd dZ	 d!	 	 	 	 	 d"dZd!d#dZe	 	 	 	 	 	 	 	 	 	 d$	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d%d       Ze	 	 	 	 	 	 	 	 	 	 d$	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d&d       Ze	 	 	 	 	 	 d'	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d( fd       Z xZS ))Transformera  Hugging Face AutoModel to generate token embeddings.
    Loads the correct class, e.g. BERT / RoBERTa etc.

    Args:
        model_name_or_path: Hugging Face models name
            (https://huggingface.co/models)
        max_seq_length: Truncate any inputs longer than max_seq_length
        model_args: Keyword arguments passed to the Hugging Face
            Transformers model
        tokenizer_args: Keyword arguments passed to the Hugging Face
            Transformers tokenizer
        config_args: Keyword arguments passed to the Hugging Face
            Transformers config
        cache_dir: Cache dir for Hugging Face Transformers to store/load
            models
        do_lower_case: If true, lowercases the input (independent if the
            model is cased or not)
        tokenizer_name_or_path: Name or path of the tokenizer. When
            None, then model_name_or_path is used
        backend: Backend used for model inference. Can be `torch`, `onnx`,
            or `openvino`. Default is `torch`.
    sentence_bert_config.jsonstrconfig_file_namemax_seq_lengthdo_lower_casez	list[str]config_keysTboolsave_in_rootc
                   t         |           || _        |	| _        |i }|i }|i }| j	                  |||	|      \  }
} | j
                  ||
||	|fi | |	d|vr||d<   t        j                  ||n|fd|i|| _        |t        | j                  d      rtt        | j                  j                  d      rTt        | j                  d      r>t        | j                  j                  j                  | j                  j                        }|| _        |:| j                  j                   j"                  | j                  j                  _        y y )Nmodel_max_length	cache_dirconfigmax_position_embeddings)super__init__r,   backend_load_config_load_modelr   from_pretrained	tokenizerhasattr
auto_modelr3   minr4   r1   r+   	__class____name__tokenizer_class)selfmodel_name_or_pathr+   
model_argstokenizer_argsconfig_argsr2   r,   tokenizer_name_or_pathr7   r3   is_peft_modelr?   s               r   r6   zTransformer.__init__@   s^    	*J!NK $ 1 12DiQXZe f+VYeZde%*<N*R1?N-.&66&<&H"N`

 
 !2DOO224MNDNN,>?!$T__%;%;%S%SUYUcUcUtUt!u,!-59^^5M5M5V5VDOO""2 .r!   c           
     @   t        |||j                  d      |j                  d      |j                  dd            	 Dt               st        d      |dk7  rt	        d      d	d
lm}  |j                  |fi |d|idfS t        j                  |fi |d|idfS )a  Loads the transformers or PEFT configuration

        Args:
            model_name_or_path (str): The model name on Hugging Face (e.g. 'sentence-transformers/all-MiniLM-L6-v2')
                or the path to a local model directory.
            cache_dir (str | None): The cache directory to store the model configuration.
            backend (str): The backend used for model inference. Can be `torch`, `onnx`, or `openvino`.
            config_args (dict[str, Any]): Keyword arguments passed to the Hugging Face Transformers config.

        Returns:
            tuple[PretrainedConfig, bool]: The model configuration and a boolean indicating whether the model is a PEFT model.
        tokenrevisionlocal_files_onlyF)r2   rJ   rK   rL   zgLoading a PEFT model requires installing the `peft` package. You can install it via `pip install peft`.torcha  PEFT models can currently only be loaded with the `torch` backend. To use other backends, load the model with `backend="torch"`, call `model.transformers_model.merge_and_unload()`, save that model with `model.save_pretrained()` and then load the model with the desired backend.r   r   r2   T)	r   getr   	Exception
ValueErrorpeftr   r:   r   )rB   rC   r2   r7   rF   r   s         r   r8   zTransformer._load_configo   s      %"#!oog.$4!,1CU!K  %&}  '! w 
 (-:--.@eKe[degkkk))*<aaW`achhhr!   c                   |dk(  r|rdD ]  }|j                  |d        t        |t              r | j                  |||fi | yt        |t              r | j
                  |||fi | yt        j                  |f||d|| _        y|dk(  rt        d||dd|| _        y|dk(  rt        d||dd|| _        yt        d	| d
      )a  Loads the transformers or PEFT model into the `auto_model` attribute

        Args:
            model_name_or_path (str): The model name on Hugging Face (e.g. 'sentence-transformers/all-MiniLM-L6-v2')
                or the path to a local model directory.
            config ("PeftConfig" | PretrainedConfig): The model configuration.
            cache_dir (str | None): The cache directory to store the model configuration.
            backend (str): The backend used for model inference. Can be `torch`, `onnx`, or `openvino`.
            is_peft_model (bool): Whether the model is a PEFT model.
            model_args (dict[str, Any]): Keyword arguments passed to the Hugging Face Transformers model.
        rM   )rK   Nr3   r2   onnxzfeature-extraction)rC   r3   	task_nameopenvinozUnsupported backend 'z6'. `backend` should be `torch`, `onnx`, or `openvino`.r$   )pop
isinstancer   _load_t5_modelr   _load_mt5_modelr   r:   r=   r   r	   rP   )rB   rC   r3   r2   r7   rH   rD   adapter_only_kwargs           r   r9   zTransformer._load_model   s   ( g *6 =&NN#5t<= &(+###$6	XZXFI.$$$%7YjY"+";";&#/5#NX# - #5. 	DO 
"1 #5. 	DO 4WI=stuur!   c                V    ddl m} dg|_         |j                  |f||d|| _        y)Loads the encoder model from T5r   )T5EncoderModel	decoder.*rS   N)transformersr^   "_keys_to_ignore_on_load_unexpectedr:   r=   )rB   rC   r3   r2   rD   r^   s         r   rY   zTransformer._load_t5_model   s8    /=HM98.88
'-
FP
r!   c                V    ddl m} dg|_         |j                  |f||d|| _        y)r]   r   )MT5EncoderModelr_   rS   N)r`   rc   ra   r:   r=   )rB   rC   r3   r2   rD   rc   s         r   rZ   zTransformer._load_mt5_model   s8    0>I]:9/99
'-
FP
r!   c                |    dt        | j                         | j                  j                  j                         dS )NzTransformer()architecture))dictget_config_dictr=   r?   r@   rB   s    r   __repr__zTransformer.__repr__   s3    d4#7#7#9HaHaHjHjkllmnnr!   c                l   |j                         D ci c]  \  }}|dv r|| }}} | j                  di ||ddi}|d   }||d<   t               rddlm} t        | j                  |      r| j                  j                  j                  rr|j                  d      }	|d   }
t        j                  |	| j                  j                  j                  |
j                        }t        j                  ||
fd	
      |d<   | j                  j                  j                  rd|v r|d   |d<   |S c c}}w )z#Returns token_embeddings, cls_token)	input_idsattention_masktoken_type_idsinputs_embedsreturn_dictTr   token_embeddings)PeftModelForFeatureExtractionrm   )device   )dimhidden_statesall_layer_embeddingsr$   )itemsr=   r   rQ   rr   rX   active_peft_configis_prompt_learningsizerM   onesnum_virtual_tokensrs   catr3   output_hidden_states)rB   featuresr   keyvaluetrans_featuresoutputsrq   rr   
batch_sizerm   prefix_attention_masks               r   forwardzTransformer.forward   s7    'nn.
UXX J
 
 "$//ONOfO$O"1:'7#$ : 4??,IJOO66II-2215
!)*:!;(-

 B B U U^l^s^s)% .3YY8M~7^de-f)*??!!66?g;U/6/GH+,;
s   D0c                B    | j                   j                  j                  S )N)r=   r3   hidden_sizeri   s    r   get_word_embedding_dimensionz(Transformer.get_word_embedding_dimension  s    %%111r!   c           
        i }t        |d   t              r|g}nt        |d   t              r\g }g |d<   |D ]L  }t        t	        |j                                     \  }}|j                  |       |d   j                  |       N |g}n7g g }	}|D ]*  }
|j                  |
d          |	j                  |
d          , ||	g}|D cg c])  }|D cg c]  }t        |      j                          c}+ }}}| j                  r-|D cg c]   }|D cg c]  }|j                          c}" }}}|j                   | j                  ||dd| j                  d       |S c c}w c c}}w c c}w c c}}w )z-Tokenizes a text and maps tokens to token-idsr   	text_keysrt   longest_firstpt)padding
truncationreturn_tensors
max_length)rX   r)   rg   nextiterrx   appendstripr,   lowerupdater;   r+   )rB   textsr   outputto_tokenizelookuptext_keytextbatch1batch2
text_tuplecolss                r   tokenizezTransformer.tokenize  sr    eAh$ 'Ka$'K"$F; 5!%d6<<>&:!;$""4({#**845 '-KFF# -
jm,jm,- "6*K ALL41A4LL ?JKs3!AGGI3KKKDNN*#..	
  5L 4Ks0   	E$ E.E$	E/E*'E/E$*E/c                    | j                   j                  ||       | j                  j                  |       | j                  |       y )N)safe_serialization)r=   save_pretrainedr;   save_config)rB   output_pathr   r   s       r   savezTransformer.save.  s:    ''HZ'[&&{3%r!   c                N    | j                  |||||||||	|
|      } | dd|i|S )N)rC   r   rJ   cache_folderrK   rL   trust_remote_codemodel_kwargstokenizer_kwargsconfig_kwargsr7   rC   r$   )_load_init_kwargs)clsrC   r   rJ   r   rK   rL   r   r   r   r   r7   r   init_kwargss                 r   loadzTransformer.load3  sP    $ ++1%-/%-' , 
 H&8HKHHr!   c                   | j                  ||||||      }|||||d}d|vri |d<   d|vri |d<   d|vri |d<   |d   j                  |       |d   j                  |       |d   j                  |       |r|d   j                  |       |	r|d   j                  |	       |
r|d   j                  |
       i |||dS )N)rC   r   rJ   r   rK   rL   )r   rJ   rK   rL   r   rD   rE   rF   )r2   r7   )load_configr   )r   rC   r   rJ   r   rK   rL   r   r   r   r   r7   r   r3   
hub_kwargss                  r   r   zTransformer._load_init_kwargsT  s   $ 1%- ! 
 #  0!2

 v%#%F< 6)')F#$&$&F=! 	|##J/ ''
3}$$Z0 < ''5#$++,<==!((7H&H|HHr!   c           
        |r|gng d}|D ]  }t         
|   |||||||      }	|	s n d	v rd|	d   v r|	d   j                  d       d|	v rd|	d   v r|	d   j                  d       d|	v rd|	d   v r|	d   j                  d       |	S )N)r(   zsentence_roberta_config.jsonzsentence_distilbert_config.jsonzsentence_camembert_config.jsonzsentence_albert_config.jsonz sentence_xlm-roberta_config.jsonzsentence_xlnet_config.json)rC   r   config_filenamerJ   r   rK   rL   rD   r   rE   rF   )r5   r   rW   )r   rC   r   r   rJ   r   rK   rL   config_filenamesr3   r?   s             r   r   zTransformer.load_config  s       	  0 	OW(#5# /)!!1 ) F 	 6!&9VL=Q&Q< $$%89v%*=HXAY*Y#$(()<=F"':f]>S'S=!%%&9:r!   )NNNNNFNrM   )rC   r)   r+   z
int | NonerD   dict[str, Any] | NonerE   r   rF   r   r2   
str | Noner,   r.   rG   r   r7   r)   r"   r#   )
rC   r)   r2   r   r7   r)   rF   dict[str, Any]r"   z*tuple[PeftConfig | PretrainedConfig, bool])rC   r)   r3   zPeftConfig | PretrainedConfigr2   r)   r7   r)   rH   r.   r"   r#   )rC   r)   r3   r   r2   r)   r"   r#   )r"   r)   )r   dict[str, torch.Tensor]r"   r   )r"   int)T)r   z.list[str] | list[dict] | list[tuple[str, str]]r   z
str | boolr"   r   )r   r)   r   r.   r"   r#   )
 NNNFFNNNrM   )rC   r)   r   r)   rJ   bool | str | Noner   r   rK   r   rL   r.   r   r.   r   r   r   r   r   r   r7   r)   r"   r
   )rC   r)   r   r)   rJ   r   r   r   rK   r   rL   r.   r   r.   r   r   r   r   r   r   r7   r)   r"   r   )r   NNNNF)rC   r)   r   r)   r   r   rJ   r   r   r   rK   r   rL   r.   r"   r   )r@   
__module____qualname____doc__r*   __annotations__r-   r/   r6   r8   r9   rY   rZ   rj   r   r   r   r   classmethodr   r   r   __classcell__)r?   s   @r   r'   r'   $   sc   . 8c7.@K@L$
 &*,004-1 $#-1-W-W #-W *	-W
 .-W +-W -W -W !+-W -W 
-W^(i"%(i2<(iGJ(iYg(i	3(iT2v2v .2v 	2v
 2v 2v 
2vh

oB2 \`&C&NX&	 &P&
 
 #'#'#!&"'.226/3II 	I
 !I !I I I  I ,I 0I -I I  
!I I@ 
 #'#'#!&"'.226/37I7I 	7I
 !7I !7I 7I 7I  7I ,7I 07I -7I 7I  
!7I 7Ir  &*#'#'#!&++ + $	+
 !+ !+ + + 
+ +r!   r'   )r   r   r   r)   r"   zCallable[..., None])%
__future__r   loggingr   pathlibr   typingr   r   r   sentence_transformers.backendr   r	   r
   ImportErrortyping_extensionsrM   r`   r   r   r   r   r   r   transformers.utils.import_utilsr   transformers.utils.peft_utilsr   (sentence_transformers.models.InputModuler   	getLoggerr@   loggerrQ   r   r%   r'   r$   r!   r   <module>r      s{    "  	  / / N'  d d = B @			8	$&(V+ V1  '&'s   B BB