
    rhR                    v    d dl mZ d dlZd dlZd dlmZmZ d dlmZ  ej                  e	      Z
 G d de      Zy)    )annotationsN)Tensornn)Modulec                  P     e Zd ZU dZg dZded<   d
d fdZddZdddd	Z xZ	S )WordWeightszDThis model can weight word embeddings, for example, with idf-values.)vocabword_weightsunknown_word_weight	list[str]config_keysc                   t         |           || _        || _        || _        g }d}|D ]J  }|}||v r||   }n+|j                         |v r||j                            }n|dz  }|j                  |       L t        j                  | dt        |       d|        t        j                  t        |      d      | _        | j                  j                  dt        j                  |      j!                  d      i       y)aZ  
        Initializes the WordWeights class.

        Args:
            vocab (List[str]): Vocabulary of the tokenizer.
            word_weights (Dict[str, float]): Mapping of tokens to a float weight value. Word embeddings are multiplied
                by this float value. Tokens in word_weights must not be equal to the vocab (can contain more or less values).
            unknown_word_weight (float, optional): Weight for words in vocab that do not appear in the word_weights lookup.
                These can be, for example, rare words in the vocab where no weight exists. Defaults to 1.
        r      z of z0 words without a weighting value. Set weight to weightN)super__init__r	   r
   r   lowerappendloggerinfolenr   	Embedding	emb_layerload_state_dicttorchFloatTensor	unsqueeze)	selfr	   r
   r   weightsnum_unknown_wordswordr   	__class__s	           {/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/models/WordWeights.pyr   zWordWeights.__init__   s     	
(#6  	#D(F|#%d+-%djjl3!Q&!NN6"	# 	 !c%j\1abuavw	
 c%j!4&&%2C2CG2L2V2VWX2Y'Z[    c                @   |d   }|d   }| j                  |d         j                  d      }||j                         z  }t        j                  |d      }|j                  d      j                  |j                               }||z  }|j                  ||d       |S )Nattention_masktoken_embeddings	input_idsr   )r'   token_weights_sum)	r   squeezefloatr   sumr   expandsizeupdate)r   featuresr&   r'   token_weights_rawtoken_weightsr*   token_weights_expandeds           r#   forwardzWordWeights.forward5   s    !"23#$67 !NN8K+@AII"M)N,@,@,BB!IImQ7 "/!8!8!<!C!CDTDYDYD[!\+.DD-=Tefgr$   T)safe_serializationc               &    | j                  |       y )N)save_config)r   output_pathr6   argskwargss        r#   savezWordWeights.saveE   s    %r$   )r   )r	   r   r
   zdict[str, float]r   r,   )r1   zdict[str, Tensor])r9   strr6   boolreturnNone)
__name__
__module____qualname____doc__r   __annotations__r   r5   r<   __classcell__)r"   s   @r#   r   r      s-    NMKM!\F  HL & &r$   r   )
__future__r   loggingr   r   r   #sentence_transformers.models.Moduler   	getLoggerrA   r   r    r$   r#   <module>rL      s2    "    6			8	$9&& 9&r$   