
    rh5                        d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ er
d dl	Z
d dlmZ  ej                  e      Z G d de      Zy)    )annotationsN)TYPE_CHECKING)SentenceEvaluator)SentenceTransformerc                       e Zd ZdZ	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 d		 	 	 	 	 d
dZ	 	 	 	 	 	 ddZedd       Zd Z	 xZ
S )MSEEvaluatora
  
    Computes the mean squared error (x100) between the computed sentence embedding
    and some target sentence embedding.

    The MSE is computed between ||teacher.encode(source_sentences) - student.encode(target_sentences)||.

    For multilingual knowledge distillation (https://arxiv.org/abs/2004.09813), source_sentences are in English
    and target_sentences are in a different language like German, Chinese, Spanish...

    Args:
        source_sentences (List[str]): Source sentences to embed with the teacher model.
        target_sentences (List[str]): Target sentences to embed with the student model.
        teacher_model (SentenceTransformer, optional): The teacher model to compute the source sentence embeddings.
        show_progress_bar (bool, optional): Show progress bar when computing embeddings. Defaults to False.
        batch_size (int, optional): Batch size to compute sentence embeddings. Defaults to 32.
        name (str, optional): Name of the evaluator. Defaults to "".
        write_csv (bool, optional): Write results to CSV file. Defaults to True.
        truncate_dim (int, optional): The dimension to truncate sentence embeddings to. `None` uses the model's current truncation
            dimension. Defaults to None.

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import MSEEvaluator
            from datasets import load_dataset

            # Load a model
            student_model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
            teacher_model = SentenceTransformer('all-mpnet-base-v2')

            # Load any dataset with some texts
            dataset = load_dataset("sentence-transformers/stsb", split="validation")
            sentences = dataset["sentence1"] + dataset["sentence2"]

            # Given queries, a corpus and a mapping with relevant documents, the MSEEvaluator computes different MSE metrics.
            mse_evaluator = MSEEvaluator(
                source_sentences=sentences,
                target_sentences=sentences,
                teacher_model=teacher_model,
                name="stsb-dev",
            )
            results = mse_evaluator(student_model)
            '''
            MSE evaluation (lower = better) on the stsb-dev dataset:
            MSE (*100):  0.805045
            '''
            print(mse_evaluator.primary_metric)
            # => "stsb-dev_negative_mse"
            print(results[mse_evaluator.primary_metric])
            # => -0.8050452917814255
    c	                    t         	|           || _        || _        || _        || _        || _        d|z   dz   | _        g d| _        || _	        d| _
        | j                  ||      | _        y )Nmse_evaluation_z_results.csv)epochstepsMSEnegative_mse)super__init__truncate_dimtarget_sentencesshow_progress_bar
batch_sizenamecsv_filecsv_headers	write_csvprimary_metricembed_inputssource_embeddings)
selfsource_sentencesr   teacher_modelr   r   r   r   r   	__class__s
            /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/evaluation/MSEEvaluator.pyr   zMSEEvaluator.__init__H   st     	( 0!2$	)D0>A4",!%!2!2=BR!S    c                p   |dk7  r|dk(  rd| }nd| d| d}nd}| j                   |d| j                    dz  }| j                  || j                        }| j                  |z
  d	z  j	                         }|d
z  }t
        j                  d| j                   d| d       t
        j                  d|d       || j                  rt        j                  j                  || j                        }t        j                  j                  |      }	t        |d|	rdndd      5 }
t        j                   |
      }|	s|j#                  | j$                         |j#                  |||g       d d d        d| i}| j'                  || j                        }| j)                  ||||       |S # 1 sw Y   @xY w)Nz after epoch z
 in epoch z after z steps z (truncated to )   d   z'MSE evaluation (lower = better) on the z dataset:zMSE (*100):	4fawzutf-8)newlinemodeencodingr   )r   r   r   r   meanloggerinfor   r   ospathjoinr   isfileopencsvwriterwriterowr   prefix_name_to_metrics store_metrics_in_model_card_data)r   modeloutput_pathr   r   out_txttarget_embeddingsmsecsv_pathoutput_file_existsfr8   metricss                r    __call__zMSEEvaluator.__call__a   s    B;{)%1&ugWUG6BG():):(;1==G --eT5J5JK&&)::q@FFHCi=dii[QXPYYZ[\mC8,-"t~~ww||K?H!#!9h8JPS^ef 5jkA)OOD$4$45s 345 "C4(--gtyyA--eWeUK5 5s   &AF,,F5c                n     |j                   |f| j                  | j                  d| j                  d|S )NT)r   r   convert_to_numpyr   )encoder   r   r   )r   r<   	sentenceskwargss       r    r   zMSEEvaluator.embed_inputs   sE     u||
"44!**
 
 	
r!   c                     y)NzKnowledge Distillation )r   s    r    descriptionzMSEEvaluator.description   s    'r!   c                @    i }| j                   | j                   |d<   |S )Nr   )r   )r   config_dicts     r    get_config_dictzMSEEvaluator.get_config_dict   s)    (*.*;*;K'r!   )NF    r$   TN)r   	list[str]r   rR   r   boolr   intr   strr   rS   r   z
int | None)Nr#   r#   )r<   r   r=   z
str | Nonereturnzdict[str, float])r<   r   rI   zstr | list[str] | np.ndarrayrV   z
np.ndarray)rV   rU   )__name__
__module____qualname____doc__r   rE   r   propertyrM   rP   __classcell__)r   s   @r    r   r      s    3r "'#'T#T $T
  T T T T !T4 []#(#7A#	#J
"
 0

 

 ( (r!   r   )
__future__r   r7   loggingr2   typingr   2sentence_transformers.evaluation.SentenceEvaluatorr   numpynp)sentence_transformers.SentenceTransformerr   	getLoggerrW   r0   r   rL   r!   r    <module>re      sA    " 
  	   PM			8	$K$ Kr!   