
    rh-                        d dl mZ d dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZ er
d dlZd dlmZ  ej.                  e      Z G d	 d
e	      Zy)    )annotationsN)TYPE_CHECKINGLiteral)SentenceEvaluator)InputExample)SimilarityFunction)pairwise_cos_simpairwise_dot_scorepairwise_euclidean_simpairwise_manhattan_sim)SentenceTransformerc                       e Zd ZdZ	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	 fdZd Zed
d       Z	 d	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 ddZ	d Z
 xZS )TripletEvaluatora  
    Evaluate a model based on a triplet: (sentence, positive_example, negative_example).
    Checks if ``similarity(sentence, positive_example) > similarity(sentence, negative_example) + margin``.

    Args:
        anchors (List[str]): Sentences to check similarity to. (e.g. a query)
        positives (List[str]): List of positive sentences
        negatives (List[str]): List of negative sentences
        main_similarity_function (Union[str, SimilarityFunction], optional):
            The similarity function to use. If not specified, use cosine similarity,
            dot product, Euclidean, and Manhattan similarity. Defaults to None.
        margin (Union[float, Dict[str, float]], optional): Margins for various similarity metrics.
            If a float is provided, it will be used as the margin for all similarity metrics.
            If a dictionary is provided, the keys should be 'cosine', 'dot', 'manhattan', and 'euclidean'.
            The value specifies the minimum margin by which the negative sample should be further from
            the anchor than the positive sample. Defaults to None.
        name (str): Name for the output. Defaults to "".
        batch_size (int): Batch size used to compute embeddings. Defaults to 16.
        show_progress_bar (bool): If true, prints a progress bar. Defaults to False.
        write_csv (bool): Write results to a CSV file. Defaults to True.
        truncate_dim (int, optional): The dimension to truncate sentence embeddings to.
            `None` uses the model's current truncation dimension. Defaults to None.
        similarity_fn_names (List[str], optional): List of similarity function names to evaluate.
            If not specified, evaluate using the ``model.similarity_fn_name``.
            Defaults to None.

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import TripletEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer('all-mpnet-base-v2')

            # Load a dataset with (anchor, positive, negative) triplets
            dataset = load_dataset("sentence-transformers/all-nli", "triplet", split="dev")

            # Initialize the TripletEvaluator using anchors, positives, and negatives
            triplet_evaluator = TripletEvaluator(
                anchors=dataset[:1000]["anchor"],
                positives=dataset[:1000]["positive"],
                negatives=dataset[:1000]["negative"],
                name="all_nli_dev",
            )
            results = triplet_evaluator(model)
            '''
            TripletEvaluator: Evaluating the model on the all-nli-dev dataset:
            Accuracy Cosine Similarity:        95.60%
            '''
            print(triplet_evaluator.primary_metric)
            # => "all_nli_dev_cosine_accuracy"
            print(results[triplet_evaluator.primary_metric])
            # => 0.956
    c                   t         |           || _        || _        || _        || _        |
| _        t        | j                        t        | j                        k(  sJ t        | j                        t        | j                        k(  sJ |dk7  r||}t        j                  d       |rt        |      nd | _        |xs g | _        |ddddd| _        nOt        |t        t         f      r||||d| _        n,t        |t"              ri ddddd|| _        nt%        d      || _        |Lt        j)                         t*        j,                  k(  xs% t        j)                         t*        j.                  k(  }|| _        d|rd|z   ndz   d	z   | _        d
dg| _        |	| _        | j9                  | j                         y )N
deprecatedzThe 'main_distance_function' parameter is deprecated. Please use 'main_similarity_function' instead. 'main_distance_function' will be removed in a future release.r   cosinedot	manhattan	euclideanzb`margin` should be a float or a dictionary with keys 'cosine', 'dot', 'manhattan', and 'euclidean'triplet_evaluation_ z_results.csvepochsteps)super__init__anchors	positives	negativesnametruncate_dimlenloggerwarningr   main_similarity_functionsimilarity_fn_namesmargin
isinstancefloatintdict
ValueError
batch_sizegetEffectiveLevelloggingINFODEBUGshow_progress_barcsv_filecsv_headers	write_csv_append_csv_headers)selfr   r   r    r&   r(   r!   r.   r3   r6   r"   r'   main_distance_function	__class__s                /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/evaluation/TripletEvaluator.pyr   zTripletEvaluator.__init__T   s    	""	(4<< C$77774<< C$7777!\16N6V'=$NNP =U78Z^ 	% $7#<" >%&qqqQDK-%+F^deDK%qqqIDK
 t  %$((*gll:if>V>V>X\c\i\i>i  "314S4ZRPSaa#W-"  !9!9:    c                N    |D ]   }| j                   j                  d|        " y )N	accuracy_)r5   append)r8   r'   fn_names      r;   r7   z$TripletEvaluator._append_csv_headers   s,    * 	;G##iy$9:	;r<   c                    g }g }g }|D ]\  }|j                  |j                  d          |j                  |j                  d          |j                  |j                  d          ^  | |||fi |S )Nr         )r?   texts)clsexampleskwargsr   r   r    examples          r;   from_input_examplesz$TripletEvaluator.from_input_examples   sy    		 	/GNN7==+,W]]1-.W]]1-.	/ 7Iy;F;;r<   c                   |dk7  r|dk(  rd| }nd| d| d}nd}| j                   |d| j                    dz  }t        j                  d	| j                   d
| d       | j	                  || j
                        }| j	                  || j                        }| j	                  || j                        }| j                  s-|j                  g| _        | j                  | j                         d d d d d}	i }
| j                  D ]  }||	v s |	|   |||      \  }}||| j                  |   z   kD  j                         j                         j                         }||
| d<   t        j                  d|j                          d|d        |&| j                   rt"        j$                  j'                  || j(                        }t"        j$                  j+                  |      svt-        |ddd      5 }t/        j0                  |      }|j3                  | j4                         |j3                  ||gt7        |
j9                               z          d d d        nZt-        |ddd      5 }t/        j0                  |      }|j3                  ||gt7        |
j9                               z          d d d        t;        | j                        dkD  rt=        |
j9                               |
d<   | j>                  rat@        jB                  dt@        jD                  dt@        jF                  dt@        jH                  dijK                  | j>                        | _&        n7t;        | j                        dkD  rd| _&        n| j                  d    d| _&        | jO                  |
| j                        }
| jQ                  ||
||       |
S # 1 sw Y   xY w# 1 sw Y   !xY w) Nz after epoch z
 in epoch z after z stepsr   z (truncated to )z.TripletEvaluator: Evaluating the model on the z dataset:c                2    t        | |      t        | |      fS N)r	   r   r   r    s      r;   <lambda>z+TripletEvaluator.__call__.<locals>.<lambda>   s     )4 )4= r<   c                2    t        | |      t        | |      fS rO   )r
   rP   s      r;   rQ   z+TripletEvaluator.__call__.<locals>.<lambda>   s    "7I6"7I6: r<   c                2    t        | |      t        | |      fS rO   )r   rP   s      r;   rQ   z+TripletEvaluator.__call__.<locals>.<lambda>       &w	:&w	:@ r<   c                2    t        | |      t        | |      fS rO   )r   rP   s      r;   rQ   z+TripletEvaluator.__call__.<locals>.<lambda>   rT   r<   r   	_accuracyz	Accuracy z Similarity:	z.2%wzutf-8)newlinemodeencodingarB   max_accuracycosine_accuracydot_accuracyeuclidean_accuracymanhattan_accuracyr   ))r"   r$   infor!   embed_inputsr   r   r    r'   similarity_fn_namer7   r(   r*   meanitem
capitalizer6   ospathjoinr4   isfileopencsvwriterwriterowr5   listvaluesr#   maxr&   r   COSINEDOT_PRODUCT	EUCLIDEAN	MANHATTANgetprimary_metricprefix_name_to_metrics store_metrics_in_model_card_data)r8   modeloutput_pathr   r   out_txtembeddings_anchorsembeddings_positivesembeddings_negativessimilarity_functionsmetricsr@   positive_scoresnegative_scoresaccuracycsv_pathfrm   s                     r;   __call__zTripletEvaluator.__call__   s    B;{)%1&ugWUG6BG():):(;1==GDTYYKxX_W``abc!..udllC#00G#00G''(-(@(@'AD$$$T%=%=> 
& // 	\G..3P3G3P&(<>R40 ,oG@T.TT[[]bbdiik197)9-.i(:(:(<'=^HUX>Z[	\ "t~~ww||K?H77>>(+(BS7K Mq ZZ]FOOD$4$45OOUENT'..:J5K$KLM M (BS7K Mq ZZ]FOOUENT'..:J5K$KLM t''(1,&)'..*:&;GN#(("))+<"..",,.B",,.B	#
 c$//0  4++,q0&4#)-)A)A!)D(EY&O#--gtyyA--eWeUK9M MM Ms   ;AO1AOOOc                n     |j                   |f| j                  | j                  d| j                  d|S )NT)r.   r3   convert_to_numpyr"   )encoder.   r3   r"   )r8   rz   	sentencesrG   s       r;   rb   zTripletEvaluator.embed_inputs   sE     u||
"44!**
 
 	
r<   c                    i }| j                   dddddk7  r| j                   |d<   | j                  | j                  |d<   |S )Nr   r   r(   r"   )r(   r"   )r8   config_dicts     r;   get_config_dictz TripletEvaluator.get_config_dict  sL    ;;QqqqQQ$(KKK!(*.*;*;K'r<   )	NNr      FTNNr   )r   	list[str]r   r   r    r   r&   str | SimilarityFunction | Noner(   zfloat | dict[str, float] | Noner!   strr.   r+   r3   boolr6   r   r"   z
int | Noner'   z?list[Literal['cosine', 'dot', 'euclidean', 'manhattan']] | Noner9   r   )rF   zlist[InputExample])NrK   rK   )
rz   r   r{   z
str | Noner   r+   r   r+   returnzdict[str, float])rz   r   r   zstr | list[str] | np.ndarrayr   z
np.ndarray)__name__
__module____qualname____doc__r   r7   classmethodrI   r   rb   r   __classcell__)r:   s   @r;   r   r      s!   7| EI26"'#'_cBN>;>; >; 	>;
 #B>; 0>; >; >;  >; >; !>; ]>; !@>;@; 	< 	< ikS(S7ASQTSbeS	Sj
"
 0

 

r<   r   )
__future__r   rl   r0   rg   typingr   r   2sentence_transformers.evaluation.SentenceEvaluatorr   sentence_transformers.readersr   *sentence_transformers.similarity_functionsr   sentence_transformers.utilr	   r
   r   r   numpynp)sentence_transformers.SentenceTransformerr   	getLoggerr   r$   r    r<   r;   <module>r      sS    " 
  	 ) P 6 I  M			8	$t( tr<   