Ë
    ÂrœhX  ã                  óŠ   — d dl mZ d dlZd dlmZ d dlZd dlmZ d dlmZ ddl	m
Z
  ej                  e«      Z G d„ d	e«      Zy)
é    )ÚannotationsN)ÚLiteral)ÚTensor)ÚInputModuleé   )ÚWhitespaceTokenizerc                  ó–   ‡ — e Zd ZU dZdZded<   g d¢Zded<   i dd	f	 	 	 	 	 	 	 dˆ fd
„Zdd„Zdd„Z	d„ Z
	 d	 	 	 	 	 dd„Zd	dœdd„Zˆ xZS )ÚBoWz¿Implements a Bag-of-Words (BoW) model to derive sentence embeddings.

    A weighting can be added to allow the generation of tf-idf vectors. The output vector has the size of the vocab.
    FÚboolÚsave_in_root)ÚvocabÚword_weightsÚunknown_word_weightÚcumulative_term_frequencyú	list[str]Úconfig_keysr   Tc                óü  •— t         ‰|   «        t        t        j	                  |«      «      }|| _        || _        || _        || _        g | _	        d}|D ]T  }|}||v r||   }n+|j                  «       |v r||j                  «          }n|dz  }| j                  j                  |«       ŒV t        j                  |› dt        |«      › d|› «       t        |t!        «       d¬«      | _        t        |«      | _        y )Nr   r   z out of z0 words without a weighting value. Set weight to F)Ú
stop_wordsÚdo_lower_case)ÚsuperÚ__init__ÚlistÚdictÚfromkeysr   r   r   r   ÚweightsÚlowerÚappendÚloggerÚinfoÚlenr   ÚsetÚ	tokenizerÚsentence_embedding_dimension)	Úselfr   r   r   r   Únum_unknown_wordsÚwordÚweightÚ	__class__s	           €ús/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/models/BoW.pyr   zBoW.__init__   sý   ø€ ô 	‰ÑÔÜ”T—]‘] 5Ó)Ó*ˆØˆŒ
Ø(ˆÔØ#6ˆÔ Ø)BˆÔ&ð ˆŒØÐØò 	(ˆDØ(ˆFØ|Ñ#Ø% dÑ+‘Ø—‘“ Ñ-Ø% d§j¡j£lÑ3‘à! QÑ&Ð!ØL‰L×Ñ Õ'ð	(ô 	‰Ø Ð! ¬#¨e«*¨Ð5eÐfyÐezÐ{ô	
ô -¨U¼s»uÐTYÔZˆŒÜ,/°«JˆÕ)ó    c                ó   — |S ©N© )r$   Úfeaturess     r)   ÚforwardzBoW.forward;   s   € àˆr*   c                ó‚   — |D cg c]  } | j                   j                  |fi |¤Ž‘Œ! }}| j                  |«      S c c}w r,   )r"   ÚtokenizeÚget_sentence_features)r$   ÚtextsÚkwargsÚtextÚ	tokenizeds        r)   r1   zBoW.tokenize?   sC   € ØINÖOÀÐ,T—^‘^×,Ñ,¨TÑ<°VÓ<ÐOˆ	ÐOØ×)Ñ)¨)Ó4Ð4ùò Ps   …$<c                ó   — | j                   S r,   )r#   )r$   s    r)   Ú get_sentence_embedding_dimensionz$BoW.get_sentence_embedding_dimensionC   s   € Ø×0Ñ0Ð0r*   c                óJ  — g }|D ]†  }t        j                  | j                  «       t         j                  ¬«      }|D ];  }| j                  r||xx   | j
                  |   z  cc<   Œ*| j
                  |   ||<   Œ= |j                  |«       Œˆ dt        j                  |«      iS )N)ÚdtypeÚsentence_embedding)ÚtorchÚzerosr8   Úfloat32r   r   r   Ústack)r$   Útokenized_textsÚpad_seq_lengthÚvectorsÚtokensÚvectorÚtokens          r)   r2   zBoW.get_sentence_featuresF   s   € ð ˆà%ò 	#ˆFÜ—[‘[ ×!FÑ!FÓ!HÔPU×P]ÑP]Ô^ˆFØò 8Ø×1Ò1Ø˜5“M T§\¡\°%Ñ%8Ñ8”Mà$(§L¡L°Ñ$7F˜5’Mð	8ð
 N‰N˜6Õ"ð	#ð %¤e§k¡k°'Ó&:Ð;Ð;r*   )Úsafe_serializationc               ó&   — | j                  |«       y r,   )Úsave_config)r$   Úoutput_pathrF   Úargsr4   s        r)   ÚsavezBoW.saveV   s   € Ø×Ñ˜Õ%r*   )r   r   r   zdict[str, float]r   Úfloatr   r   )r.   zdict[str, Tensor])r3   r   Úreturnz	list[int])r   )r@   zlist[list[int]]rA   ÚintrM   z1dict[Literal['sentence_embedding'], torch.Tensor])rI   ÚstrrF   r   rM   ÚNone)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   Ú__annotations__r   r   r/   r1   r8   r2   rK   Ú__classcell__)r(   s   @r)   r
   r
      s™   ø… ñð
 €L$ÓÚj€KÓjð
 *,Ø%&Ø*.ð 7àð 7ð 'ð 7ð #ð	 7ð
 $(õ 7óDó5ò1ð GHð<Ø.ð<Ø@Cð<à	:ó<ð  HL÷ &ñ &r*   r
   )Ú
__future__r   ÚloggingÚtypingr   r<   r   Ú(sentence_transformers.models.InputModuler   r"   r   Ú	getLoggerrQ   r   r
   r-   r*   r)   ú<module>r\      s:   ðÝ "ã Ý ã Ý å @å *à	ˆ×	Ñ	˜8Ó	$€ôG&ˆ+õ G&r*   