
    rh                        d dl mZ d dlZd dlZd dlZd dlmZ d dlZd dl	m
Z
mZ d dlmZ d dlmZ erd dlmZ  ej$                  e      Z G d d	e      Zy)
    )annotationsN)TYPE_CHECKING)average_precision_scoref1_score)BinaryClassificationEvaluator)SentenceEvaluator)CrossEncoderc                  X    e Zd ZdZddddd	 	 	 	 	 	 	 	 	 	 	 d	dZ	 d
	 	 	 	 	 	 	 	 	 ddZy)#CrossEncoderClassificationEvaluatora|
  
    Evaluate a CrossEncoder model based on the accuracy of the predicted class vs. the gold labels.
    The evaluator expects a list of sentence pairs and a list of gold labels. If the model has a single output,
    it is assumed to be a binary classification model and the evaluator will calculate accuracy, F1, precision, recall,
    and average precision. If the model has multiple outputs, the evaluator will calculate macro F1, micro F1, and
    weighted F1.

    Args:
        sentence_pairs (List[List[str]]): A list of sentence pairs with each element being a list of two strings.
        labels (List[int]): A list of integers with the gold labels for each sentence pair.
        name (str): Name of the evaluator, useful for the generated model card.
        batch_size (int): Batch size used for the evaluation. Defaults to 32.
        show_progress_bar (bool): Output a progress bar. Defaults to None, which shows the progress bar if the logging level is INFO or DEBUG.
        write_csv (bool): Write results to a CSV file. If a CSV already exists, then values are appended. Defaults to True.

    Example:
        ::

            from sentence_transformers import CrossEncoder
            from sentence_transformers.cross_encoder.evaluation import CrossEncoderClassificationEvaluator
            from datasets import load_dataset

            # Load a model
            model = CrossEncoder("cross-encoder/nli-deberta-v3-base")

            # Load a dataset with two text columns and a class label column (https://huggingface.co/datasets/sentence-transformers/all-nli)
            eval_dataset = load_dataset("sentence-transformers/all-nli", "pair-class", split="dev[-1000:]")

            # Create a list of pairs, and map the labels to the labels that the model knows
            pairs = list(zip(eval_dataset["premise"], eval_dataset["hypothesis"]))
            label_mapping = {0: 1, 1: 2, 2: 0}
            labels = [label_mapping[label] for label in eval_dataset["label"]]

            # Initialize the evaluator
            cls_evaluator = CrossEncoderClassificationEvaluator(
                sentence_pairs=pairs,
                labels=labels,
                name="all-nli-dev",
            )
            results = cls_evaluator(model)
            '''
            CrossEncoderClassificationEvaluator: Evaluating the model on all-nli-dev dataset:
            Macro F1:           89.43
            Micro F1:           89.30
            Weighted F1:        89.33
            '''
            print(cls_evaluator.primary_metric)
            # => all-nli-dev_f1_macro
            print(results[cls_evaluator.primary_metric])
            # => 0.8942858180262628
         NT)name
batch_sizeshow_progress_bar	write_csvc               V   t        |      t        |      k7  rt        d      || _        t        j                  |      | _        || _        || _        |4t        j                         t        j                  t        j                  fv }|| _        d|rd|z   ndz   dz   | _        || _        y )Nz3sentence_pairs and labels must have the same lengthr   _r   z_results.csv)len
ValueErrorsentence_pairsnpasarraylabelsr   r   loggergetEffectiveLevelloggingINFODEBUGr   csv_filer   )selfr   r   r   r   r   r   kwargss           /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/cross_encoder/evaluation/classification.py__init__z,CrossEncoderClassificationEvaluator.__init__I   s     ~#f+-RSS,jj(	$$ & 8 8 :w||W]]>[ [!2=ttY[\_mm"    c                   |dk7  r|dk(  rd| }nd| d| d}nd}t         j                  d| j                   d| d	       |j                  | j                  d
| j
                        }|j                  dk(  rt        j                  || j                  d
      \  }}t        j                  || j                  d
      \  }	}
}}t        | j                  |      }t         j                  d|dz  dd|dd       t         j                  d|	dz  dd|dd       t         j                  d|
dz  d       t         j                  d|dz  d       t         j                  d|dz  d       |||	||
||d}g d| _        d| _        nt        j                  |d      }t!        | j                  |d      }t!        | j                  |d      }t!        | j                  |d      }t         j                  d|dz  d       t         j                  d |dz  d       t         j                  d!|dz  d       |||d"}g d#| _        d$| _        || j"                  rt$        j&                  j)                  || j*                        }t$        j&                  j-                  |      }t/        ||rd%nd&d'(      5 }t1        j2                  |      }|s|j5                  | j                         |j5                  ||g|j7                                d d d        | j9                  || j                        }| j;                  ||||       |S # 1 sw Y   ;xY w))Nz after epoch z
 in epoch z after z stepsr   z=CrossEncoderClassificationEvaluator: Evaluating the model on z dataset:T)convert_to_numpyr      zAccuracy:          d   z.2fz	(Threshold: z.4f)zF1:                zPrecision:         zRecall:            zAverage Precision: )accuracyaccuracy_thresholdf1f1_threshold	precisionrecallaverage_precision)	epochstepsAccuracyAccuracy_ThresholdF1F1_Threshold	PrecisionRecallAverage_Precisionr2   )axismacro)averagemicroweightedzMacro F1:           zMicro F1:           zWeighted F1:        )f1_macrof1_microf1_weighted)r3   r4   Macro_F1Micro_F1Weighted_F1rA   awzutf-8)modeencoding)r   infor   predictr   r   
num_labelsr   find_best_acc_and_thresholdr   find_best_f1_and_thresholdr   csv_headersprimary_metricr   argmaxr   r   ospathjoinr   isfileopencsvwriterwriterowvaluesprefix_name_to_metrics store_metrics_in_model_card_data)r    modeloutput_pathr3   r4   out_txtpred_scoresaccacc_thresholdr.   r0   r1   r/   apmetricspred_labelsrA   rB   rC   csv_pathoutput_file_existsfrY   s                          r"   __call__z,CrossEncoderClassificationEvaluator.__call__b   sP    B;{)%1&ugWUG6BGSTXT]T]S^^fgnfoopqrmm$$J`J` $ 
 q !>!Z!ZT[[$"C 3P2j2jT[[$3/B	6< )kBBKK-cCi_N=Y\J]]^_`KK-b3hs^>,WZI[[\]^KK-i#oc-BCDKK-fsl3-?@AKK-b3hs^<=  &3 ,& %'G
 D #6D))Ka8K['JH['JH"4;;ZPKKK.x#~c.BCDKK.x#~c.BCDKK.{S/@.EFG %$*G
  YD",D"t~~ww||K?H!#!9h,>SCRYZ C^_A)OOD$4$45 A0@ ABC --gtyyA--eWeUKC Cs   >AMM)r   zlist[list[str]]r   z	list[int]r   strr   intr   zbool | Noner   bool)Nr&   r&   )
r^   r	   r_   z
str | Noner3   rl   r4   rl   returnzdict[str, float])__name__
__module____qualname____doc__r#   rj    r$   r"   r   r      s    2r )-#'# #
 # # '# #4 bdR!R0:RJMR[^R	Rr$   r   )
__future__r   rX   r   rS   typingr   numpyr   sklearn.metricsr   r    sentence_transformers.evaluationr   2sentence_transformers.evaluation.SentenceEvaluatorr   0sentence_transformers.cross_encoder.CrossEncoderr	   	getLoggerro   r   r   rs   r$   r"   <module>r|      sG    " 
  	    = J PM			8	$`*; `r$   