
    rhU                        d dl mZ d dlZd dlZd dlZd dlmZ d dlZd dl	Z	d dl
mZ d dlmZ erd dl	mZ d dlmZ  ej"                  e      Z G d d	e      Zy)
    )annotationsN)TYPE_CHECKING)SentenceEvaluator)pytorch_cos_sim)Tensor)SentenceTransformerc                       e Zd ZdZ	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 d	 	 	 	 	 	 	 	 	 d	dZ	 	 	 	 	 	 d
dZd Z xZS )TranslationEvaluatora	  
    Given two sets of sentences in different languages, e.g. (en_1, en_2, en_3...) and (fr_1, fr_2, fr_3, ...),
    and assuming that fr_i is the translation of en_i.
    Checks if vec(en_i) has the highest similarity to vec(fr_i). Computes the accuracy in both directions

    The labels need to indicate the similarity between the sentences.

    Args:
        source_sentences (List[str]): List of sentences in the source language.
        target_sentences (List[str]): List of sentences in the target language.
        show_progress_bar (bool): Whether to show a progress bar when computing embeddings. Defaults to False.
        batch_size (int): The batch size to compute sentence embeddings. Defaults to 16.
        name (str): The name of the evaluator. Defaults to an empty string.
        print_wrong_matches (bool): Whether to print incorrect matches. Defaults to False.
        write_csv (bool): Whether to write the evaluation results to a CSV file. Defaults to True.
        truncate_dim (int, optional): The dimension to truncate sentence embeddings to. If None, the model's
            current truncation dimension will be used. Defaults to None.

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import TranslationEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')

            # Load a parallel sentences dataset
            dataset = load_dataset("sentence-transformers/parallel-sentences-news-commentary", "en-nl", split="train[:1000]")

            # Initialize the TranslationEvaluator using the same texts from two languages
            translation_evaluator = TranslationEvaluator(
                source_sentences=dataset["english"],
                target_sentences=dataset["non_english"],
                name="news-commentary-en-nl",
            )
            results = translation_evaluator(model)
            '''
            Evaluating translation matching Accuracy of the model on the news-commentary-en-nl dataset:
            Accuracy src2trg: 90.80
            Accuracy trg2src: 90.40
            '''
            print(translation_evaluator.primary_metric)
            # => "news-commentary-en-nl_mean_accuracy"
            print(results[translation_evaluator.primary_metric])
            # => 0.906
    c	                4   t         	|           || _        || _        || _        || _        || _        || _        || _        t        | j                        t        | j                        k(  sJ |rd|z   }d|z   dz   | _
        g d| _        || _        d| _        y )N_translation_evaluationz_results.csv)epochstepssrc2trgtrg2srcmean_accuracy)super__init__source_sentencestarget_sentencesname
batch_sizeshow_progress_barprint_wrong_matchestruncate_dimlencsv_filecsv_headers	write_csvprimary_metric)
selfr   r   r   r   r   r   r   r   	__class__s
            /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/evaluation/TranslationEvaluator.pyr   zTranslationEvaluator.__init__H   s     	 0 0	$!2#6 (4(()S1F1F-GGGG:D047.HC"-    c           	     @   |dk7  r|dk(  rd| }nd| d| d}nd}| j                   |d| j                    dz  }t        j                  d	| j                   d
| d       t	        j
                  | j                  || j                              }t	        j
                  | j                  || j                              }t        ||      j                         j                         j                         }d}	d}
t        t        |            D ]  }t        j                   ||         }||k(  r|	dz  }	&| j"                  s3t%        d|d|d|       t%        d| j                  |          t%        d| j                  |   d||   |   dd       t%        d| j                  |   d||   |   dd       t'        ||         }t)        |d d      }|d d D ]%  \  }}t%        d|d|dd| j                  |          '  |j*                  }t        t        |            D ]%  }t        j                   ||         }||k(  s!|
dz  }
' |	t        |      z  }|
t        |      z  }t        j                  d|dz  d       t        j                  d|dz  d       || j,                  rt.        j0                  j3                  || j4                        }t.        j0                  j7                  |      }t9        |d|rdnd d!"      5 }t;        j<                  |      }|s|j?                  | j@                         |j?                  ||||g       d d d        ||||z   d#z  d$}| jC                  || j                        }| jE                  ||||       |S # 1 sw Y   GxY w)%Nz after epoch z
 in epoch z after z steps z (truncated to )z=Evaluating translation matching Accuracy of the model on the z dataset:r      z
Incorrect  : Sourcezis most similar to targetzinstead of targetzSource     :zPred Target:z(Score: z.4fzTrue Target:c                    | d   S )Nr*    )xs    r#   <lambda>z/TranslationEvaluator.__call__.<locals>.<lambda>   s
    ! r$   T)keyreverse   	zAccuracy src2trg: d   z.2fzAccuracy trg2src: awzutf-8)newlinemodeencoding   )src2trg_accuracytrg2src_accuracyr   )#r   loggerinfor   torchstackembed_inputsr   r   r   detachcpunumpyranger   npargmaxr   print	enumeratesortedTr   ospathjoinr   isfileopencsvwriterwriterowr   prefix_name_to_metrics store_metrics_in_model_card_data)r!   modeloutput_pathr   r   out_txtembeddings1embeddings2cos_simscorrect_src2trgcorrect_trg2srcimax_idxresultsidxscoreacc_src2trgacc_trg2srccsv_pathoutput_file_existsfrQ   metricss                          r#   __call__zTranslationEvaluator.__call__f   s    B;{)%1&ugWUG6BG():):(;1==GSTXT]T]S^^fgnfoopqrkk$"3"3E4;P;P"QRkk$"3"3E4;P;P"QR";<CCEIIKQQSs8}% 	ZAii,GG|1$))-q2MwXkmnond&;&;A&>?nd&;&;G&DQYZ[Q\]dQefiPjjkFlmnd&;&;A&>(8TU;WX>Z]J^^_@`a#HQK0 ndK")"1+ ZJC$xc{!%<d>S>STW>XYZ	Z  ::s8}% 	%Aii,GG|1$	%
 &H5%H5(s):3(?@A(s):3(?@A"t~~ww||K?H!#!9h8JPS^ef JjkA)OOD$4$45{K HIJ !, +)K71<

 --gtyyA--eWeUKJ Js   ANNc                n     |j                   |f| j                  | j                  d| j                  d|S )NF)r   r   convert_to_numpyr   )encoder   r   r   )r!   rU   	sentenceskwargss       r#   r@   z!TranslationEvaluator.embed_inputs   sE     u||
"44"**
 
 	
r$   c                @    i }| j                   | j                   |d<   |S )Nr   )r   )r!   config_dicts     r#   get_config_dictz$TranslationEvaluator.get_config_dict   s)    (*.*;*;K'r$   )F   r'   FTN)r   	list[str]r   rr   r   boolr   intr   strr   rs   r   rs   r   z
int | None)Nr&   r&   )
rU   r   rV   z
str | Noner   rt   r   rt   returnzdict[str, float])rU   r   rl   zstr | list[str] | np.ndarrayrv   zlist[Tensor])	__name__
__module____qualname____doc__r   rh   r@   rp   __classcell__)r"   s   @r#   r
   r
      s    /j #($)#'.#. $.  	.
 . . ". . !.> ikD(D7ADQTDbeD	DL
"
 0

 

r$   r
   )
__future__r   rP   loggingrK   typingr   rC   rE   r>   2sentence_transformers.evaluation.SentenceEvaluatorr   sentence_transformers.utilr   r   )sentence_transformers.SentenceTransformerr   	getLoggerrw   r<   r
   r,   r$   r#   <module>r      sJ    " 
  	     P 6M			8	$i, ir$   