
    rh:                        d dl mZ d dlZd dlZd dlZd dlmZmZ d dlZ	d dl
Z
d dlZd dlmZmZ d dlmZ d dlmZ erd dl
mZ d dlmZ  ej,                  e      Z G d	 d
e      Zy)    )annotationsN)TYPE_CHECKINGCallable)average_precision_score
ndcg_score)SentenceEvaluator)cos_sim)Tensor)SentenceTransformerc            	           e Zd ZdZdddedddddf		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 d	 	 	 	 	 	 	 	 	 dd	Zdd
ZddZddZ		 	 d	 	 	 	 	 	 	 	 	 ddZ
d Z xZS )RerankingEvaluatoraD  
    This class evaluates a SentenceTransformer model for the task of re-ranking.

    Given a query and a list of documents, it computes the score [query, doc_i] for all possible
    documents and sorts them in decreasing order. Then, MRR@10, NDCG@10 and MAP is compute to measure the quality of the ranking.

    Args:
        samples (list): A list of dictionaries, where each dictionary represents a sample and has the following keys:

            - 'query': The search query.
            - 'positive': A list of positive (relevant) documents.
            - 'negative': A list of negative (irrelevant) documents.
        at_k (int, optional): Only consider the top k most similar documents to each query for the evaluation. Defaults to 10.
        name (str, optional): Name of the evaluator. Defaults to "".
        write_csv (bool, optional): Write results to CSV file. Defaults to True.
        similarity_fct (Callable[[torch.Tensor, torch.Tensor], torch.Tensor], optional): Similarity function between sentence embeddings. By default, cosine similarity. Defaults to cos_sim.
        batch_size (int, optional): Batch size to compute sentence embeddings. Defaults to 64.
        show_progress_bar (bool, optional): Show progress bar when computing embeddings. Defaults to False.
        use_batched_encoding (bool, optional): Whether or not to encode queries and documents in batches for greater speed, or 1-by-1 to save memory. Defaults to True.
        truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to. `None` uses the model's current truncation dimension. Defaults to None.
        mrr_at_k (Optional[int], optional): Deprecated parameter. Please use `at_k` instead. Defaults to None.

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import RerankingEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer("all-MiniLM-L6-v2")

            # Load a dataset with queries, positives, and negatives
            eval_dataset = load_dataset("microsoft/ms_marco", "v1.1", split="validation")

            samples = [
                {
                    "query": sample["query"],
                    "positive": [text for is_selected, text in zip(sample["passages"]["is_selected"], sample["passages"]["passage_text"]) if is_selected],
                    "negative": [text for is_selected, text in zip(sample["passages"]["is_selected"], sample["passages"]["passage_text"]) if not is_selected],
                }
                for sample in eval_dataset
            ]

            # Initialize the evaluator
            reranking_evaluator = RerankingEvaluator(
                samples=samples,
                name="ms-marco-dev",
            )
            results = reranking_evaluator(model)
            '''
            RerankingEvaluator: Evaluating the model on the ms-marco-dev dataset:
            Queries: 9706      Positives: Min 1.0, Mean 1.1, Max 5.0   Negatives: Min 1.0, Mean 7.1, Max 9.0
            MAP: 56.07
            MRR@10: 56.70
            NDCG@10: 67.08
            '''
            print(reranking_evaluator.primary_metric)
            # => ms-marco-dev_ndcg@10
            print(results[reranking_evaluator.primary_metric])
            # => 0.6708042171399308
    
    T@   FNc                   t         |           || _        || _        |
!t        j                  d|
 d       |
| _        n|| _        || _        || _        || _	        || _
        |	| _        t        | j                  t              r(t        | j                  j                               | _        | j                  D cg c](  }t!        |d         dkD  st!        |d         dkD  s'|* c}| _        d|rd|z   ndz   d	| j                   d
z   | _        dddd| j                   d| j                   g| _        || _        d| j                   | _        y c c}w )Nz?The `mrr_at_k` parameter has been deprecated; please use `at_k=z
` instead.positiver   negativer   _r   z
_results_@z.csvepochstepsMAPMRR@NDCG@ndcg@)super__init__samplesnameloggerwarningat_ksimilarity_fct
batch_sizeshow_progress_baruse_batched_encodingtruncate_dim
isinstancedictlistvalueslencsv_filecsv_headers	write_csvprimary_metric)selfr   r!   r   r.   r"   r#   r$   r%   r&   mrr_at_ksample	__class__s               /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/evaluation/RerankingEvaluator.pyr   zRerankingEvaluator.__init__Y   sZ    		NN\]e\ffpqr DIDI,$!2$8!(dllD) 3 3 56DL "&
VJ5G1H11LQTU[\fUgQhklQlF
 -dd
KPZ[_[d[dZeeiNjj499+DII;
 # %dii[1
s   ;EE$Ec                (   |dk7  r|dk(  rd| }nd| d| d}nd}| j                   |d| j                    d	z  }t        j                  d
| j                   d| d       | j	                  |      }|d   }|d   }|d   }	| j
                  D 
cg c]  }
t        |
d          }}
| j
                  D 
cg c]  }
t        |
d          }}
t        j                  dt        | j
                         dt        j                  |      ddt        j                  |      ddt        j                  |      ddt        j                  |      ddt        j                  |      ddt        j                  |      d       t        j                  d|dz  d       t        j                  d| j                   d|dz  d       t        j                  d| j                   d|	dz  d       || j                  rt        j                  j                  || j                         }t        j                  j#                  |      }t%        |d|rdndd !      5 }t'        j(                  |      }|s|j+                  | j,                         |j+                  |||||	g       ddd       d|d"| j                   |d#| j                   |	i}| j/                  || j                        }| j1                  ||||       |S c c}
w c c}
w # 1 sw Y   gxY w)$a  
        Evaluates the model on the dataset and returns the evaluation metrics.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to evaluate.
            output_path (str, optional): The output path to write the results. Defaults to None.
            epoch (int, optional): The current epoch number. Defaults to -1.
            steps (int, optional): The current step number. Defaults to -1.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        z after epoch z
 in epoch z after z stepsr   Nz (truncated to )z0RerankingEvaluator: Evaluating the model on the z dataset:mapmrrndcgr   r   z	Queries: z 	 Positives: Min z.1fz, Mean z, Max z 	 Negatives: Min zMAP: d   z.2fr   z: r   awzutf-8)newlinemodeencodingzmrr@r   )r&   r   infor   compute_metricesr   r+   npminmeanmaxr!   r.   ospathjoinr,   isfileopencsvwriterwriterowr-   prefix_name_to_metrics store_metrics_in_model_card_data)r0   modeloutput_pathr   r   out_txtscoresmean_apmean_mrr	mean_ndcgr2   num_positivesnum_negativescsv_pathoutput_file_existsfrN   metricss                     r4   __call__zRerankingEvaluator.__call__   s    B;{)%1&ugWUG6BG():):(;1==GFtyykQYZaYbbcde&&u--%=6N	 @D||LVVJ/0LL?C||LVVJ/0LLDLL)**=bff]>STW=XX_`b`g`ghu`vwz_{  |B  CE  CI  CI  JW  CX  Y\  B]  ]p  qs  qw  qw  xE  qF  GJ  pK  KR  SU  SZ  SZ  [h  Si  jm  Rn  nt  uw  u{  u{  |I  uJ  KN  tO  P	
 	eGcM#./0d499+R3s';<=eDII;bS(=>? "t~~ww||K?H!#!9h8JPS^ef NjkA)OOD$4$45w) LMN 7499+DII;

 --gtyyA--eWeUK9 MLN Ns   K>5LA	LLc                ^    | j                   r| j                  |      S | j                  |      S )a  
        Computes the evaluation metrics for the given model.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        )r%   compute_metrices_batchedcompute_metrices_individual)r0   rR   s     r4   rC   z#RerankingEvaluator.compute_metrices   s7     (( ))%0	
 11%8	
    c                n   g }g }g }| j                  || j                  D cg c]  }|d   	 c}d| j                        }g }| j                  D ]*  }|j                  |d          |j                  |d          , | j                  ||d| j                        }d\  }	}
| j                  D ]=  }||	   }|	dz  }	t	        |d         }t	        |d         }||
|
|z   |z    }|
||z   z  }
|dk(  s|dk(  rH| j                  ||      }t	        |j                        dkD  r|d   }t        j                  |       }|j                         j                         }dg|z  dg|z  z   }d}t        |d| j                         D ]  \  }}||   sd|dz   z  } n |j                  |       |j                  t        |g|g| j                  	             |j                  t        ||             @ t!        j"                  |      }t!        j"                  |      }t!        j"                  |      }|||d
S c c}w )aE  
        Computes the evaluation metrics in a batched way, by batching all queries and all documents together.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        queryencode_fn_namer$   r   r   document)r   r      r   kr9   r:   r;   )embed_inputsr   r$   extendr+   r"   shapetorchargsortcputolist	enumerater!   appendr   r   rD   rF   )r0   rR   all_mrr_scoresall_ndcg_scoresall_ap_scoresr2   all_query_embsall_docsall_docs_embs	query_idxdocs_idxinstance	query_embnum_posnum_negdocs_embpred_scorespred_scores_argsortis_relevant	mrr_scorerankindexrV   rW   rX   s                            r4   ra   z+RerankingEvaluator.compute_metrices_batched   sm    **+/<<8VG_8""44	 + 
 ll 	0FOOF:./OOF:./	0 ))8J$J`J` * 

 #	8  	TH&y1INI(:./G(:./G$X70BW0LMH'))H!|w!|--iBK;$$%))!n"'--"=%//+224K #-1#-7KI()<Q)KL eu% !TAXI !!), "":{mk]dii#XY   !8k!RSA 	TD ''-(77>*GGO,	xCCo 9s   H2
c                   g }g }g }t        j                   | j                  | j                   d      D ]y  }|d   }t        |d         }t        |d         }t	        |      dk(  st	        |      dk(  rB||z   }	dgt	        |      z  dgt	        |      z  z   }
| j                  ||gdd	      }| j                  ||	d
d	      }| j                  ||      }t	        |j                        dkD  r|d   }t        j                  |       }|j                         j                         }d}t        |d| j                         D ]  \  }}|
|   sd|dz   z  } n |j                  |       |j                  t        |
g|g| j                               |j                  t!        |
|             | t#        j$                  |      }t#        j$                  |      }t#        j$                  |      }|||dS )aO  
        Computes the evaluation metrics individually by embedding every (query, positive, negative) tuple individually.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        Samples)disabledescre   r   r   r   ri   Frf   rh   rj   rl   )tqdmr   r$   r)   r+   rm   r"   ro   rp   rq   rr   rs   rt   r!   ru   r   r   rD   rF   )r0   rR   rv   rw   rx   r~   re   r   r   docsr   r   r   r   r   r   r   r   rV   rW   rX   s                        r4   rb   z.RerankingEvaluator.compute_metrices_individual!  s    		$,,D<R<R8RYbc !	THW%EHZ01HHZ01H8}!S]a%7h&D#H-c(m0CCK))%%di)jI((Zch(iH--iBK;$$%))!n"'--"=%//+224K I()<Q)KL eu% !TAXI !!), "":{mk]dii#XY   !8k!RSC!	TF ''-(77>*GGO,	xCCrc   c                    ||j                   }n#|dk(  r|j                  }n|dk(  r|j                  } |f| j                  |d| j                  d|S )Nre   rh   T)r#   r$   convert_to_tensorr&   )encodeencode_queryencode_documentr#   r&   )r0   rR   	sentencesrg   r$   kwargs	encode_fns          r4   rm   zRerankingEvaluator.embed_inputsX  sn     !Iw&**Iz)--I
/"**
 
 	
rc   c                X    d| j                   i}| j                  | j                  |d<   |S )Nr!   r&   )r!   r&   )r0   config_dicts     r4   get_config_dictz"RerankingEvaluator.get_config_dicto  s2    tyy)(*.*;*;K'rc   )r   z list[dict[str, str | list[str]]]r!   intr   strr.   boolr"   z4Callable[[torch.Tensor, torch.Tensor], torch.Tensor]r#   r   r$   r   r%   r   r&   
int | Noner1   r   )Nr6   r6   )
rR   r   rS   
str | Noner   r   r   r   returnzdict[str, float])rR   r   )NN)
rR   r   r   zstr | list[str] | np.ndarrayrg   r   r$   zbool | Noner   r
   )__name__
__module____qualname____doc__r	   r   r_   rC   ra   rb   rm   r   __classcell__)r3   s   @r4   r   r      s   =D OV"'%)#'#.21.2 .2 	.2
 .2 M.2 .2  .2 #.2 !.2 .2b ik=(=7A=QT=be=	=~
 GDR5Dv &*)-
"
 0
 #	

 '
 

.rc   r   )
__future__r   rM   loggingrH   typingr   r   numpyrD   rp   r   sklearn.metricsr   r   2sentence_transformers.evaluation.SentenceEvaluatorr   sentence_transformers.utilr	   r
   )sentence_transformers.SentenceTransformerr   	getLoggerr   r   r    rc   r4   <module>r      sR    " 
  	 *    ? P .M 
		8	$Z* Zrc   