
    rh-b                        d dl mZ d dlZd dlZd dlZd dlZd dlmZmZ d dl	Z
d dlZd dlmZ d dlmZ d dlmZ d dlmZ erd dlmZ  ej*                  e      Z G d	 d
e      Zy)    )annotationsN)TYPE_CHECKINGCallable)Tensor)trange)SentenceEvaluator)SimilarityFunction)SentenceTransformerc                  >    e Zd ZdZddgdgg dg ddgdddd	d
d
d
d
d
d
d
df	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZd Z	 	 	 d	 	 	 	 	 	 	 	 	 ddZ	 	 	 d	 	 	 	 	 	 	 ddZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 ddZddZ	d Z
ed        Zd Z xZS )InformationRetrievalEvaluatora  
    This class evaluates an Information Retrieval (IR) setting.

    Given a set of queries and a large corpus set. It will retrieve for each query the top-k most similar document. It measures
    Mean Reciprocal Rank (MRR), Recall@k, and Normalized Discounted Cumulative Gain (NDCG)

    Args:
        queries (Dict[str, str]): A dictionary mapping query IDs to queries.
        corpus (Dict[str, str]): A dictionary mapping document IDs to documents.
        relevant_docs (Dict[str, Set[str]]): A dictionary mapping query IDs to a set of relevant document IDs.
        corpus_chunk_size (int): The size of each chunk of the corpus. Defaults to 50000.
        mrr_at_k (List[int]): A list of integers representing the values of k for MRR calculation. Defaults to [10].
        ndcg_at_k (List[int]): A list of integers representing the values of k for NDCG calculation. Defaults to [10].
        accuracy_at_k (List[int]): A list of integers representing the values of k for accuracy calculation. Defaults to [1, 3, 5, 10].
        precision_recall_at_k (List[int]): A list of integers representing the values of k for precision and recall calculation. Defaults to [1, 3, 5, 10].
        map_at_k (List[int]): A list of integers representing the values of k for MAP calculation. Defaults to [100].
        show_progress_bar (bool): Whether to show a progress bar during evaluation. Defaults to False.
        batch_size (int): The batch size for evaluation. Defaults to 32.
        name (str): A name for the evaluation. Defaults to "".
        write_csv (bool): Whether to write the evaluation results to a CSV file. Defaults to True.
        truncate_dim (int, optional): The dimension to truncate the embeddings to. Defaults to None.
        score_functions (Dict[str, Callable[[Tensor, Tensor], Tensor]]): A dictionary mapping score function names to score functions. Defaults to the ``similarity`` function from the ``model``.
        main_score_function (Union[str, SimilarityFunction], optional): The main score function to use for evaluation. Defaults to None.
        query_prompt (str, optional): The prompt to be used when encoding the corpus. Defaults to None.
        query_prompt_name (str, optional): The name of the prompt to be used when encoding the corpus. Defaults to None.
        corpus_prompt (str, optional): The prompt to be used when encoding the corpus. Defaults to None.
        corpus_prompt_name (str, optional): The name of the prompt to be used when encoding the corpus. Defaults to None.
        write_predictions (bool): Whether to write the predictions to a JSONL file. Defaults to False.
            This can be useful for downstream evaluation as it can be used as input to the :class:`~sentence_transformers.sparse_encoder.evaluation.ReciprocalRankFusionEvaluator` that accept precomputed predictions.

    Example:
        ::

            import random
            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import InformationRetrievalEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer('all-MiniLM-L6-v2')

            # Load the Touche-2020 IR dataset (https://huggingface.co/datasets/BeIR/webis-touche2020, https://huggingface.co/datasets/BeIR/webis-touche2020-qrels)
            corpus = load_dataset("BeIR/webis-touche2020", "corpus", split="corpus")
            queries = load_dataset("BeIR/webis-touche2020", "queries", split="queries")
            relevant_docs_data = load_dataset("BeIR/webis-touche2020-qrels", split="test")

            # For this dataset, we want to concatenate the title and texts for the corpus
            corpus = corpus.map(lambda x: {'text': x['title'] + " " + x['text']}, remove_columns=['title'])

            # Shrink the corpus size heavily to only the relevant documents + 30,000 random documents
            required_corpus_ids = set(map(str, relevant_docs_data["corpus-id"]))
            required_corpus_ids |= set(random.sample(corpus["_id"], k=30_000))
            corpus = corpus.filter(lambda x: x["_id"] in required_corpus_ids)

            # Convert the datasets to dictionaries
            corpus = dict(zip(corpus["_id"], corpus["text"]))  # Our corpus (cid => document)
            queries = dict(zip(queries["_id"], queries["text"]))  # Our queries (qid => question)
            relevant_docs = {}  # Query ID to relevant documents (qid => set([relevant_cids])
            for qid, corpus_ids in zip(relevant_docs_data["query-id"], relevant_docs_data["corpus-id"]):
                qid = str(qid)
                corpus_ids = str(corpus_ids)
                if qid not in relevant_docs:
                    relevant_docs[qid] = set()
                relevant_docs[qid].add(corpus_ids)

            # Given queries, a corpus and a mapping with relevant documents, the InformationRetrievalEvaluator computes different IR metrics.
            ir_evaluator = InformationRetrievalEvaluator(
                queries=queries,
                corpus=corpus,
                relevant_docs=relevant_docs,
                name="BeIR-touche2020-subset-test",
            )
            results = ir_evaluator(model)
            '''
            Information Retrieval Evaluation of the model on the BeIR-touche2020-test dataset:
            Queries: 49
            Corpus: 31923

            Score-Function: cosine
            Accuracy@1: 77.55%
            Accuracy@3: 93.88%
            Accuracy@5: 97.96%
            Accuracy@10: 100.00%
            Precision@1: 77.55%
            Precision@3: 72.11%
            Precision@5: 71.43%
            Precision@10: 62.65%
            Recall@1: 1.72%
            Recall@3: 4.78%
            Recall@5: 7.90%
            Recall@10: 13.86%
            MRR@10: 0.8580
            NDCG@10: 0.6606
            MAP@100: 0.2934
            '''
            print(ir_evaluator.primary_metric)
            # => "BeIR-touche2020-test_cosine_map@100"
            print(results[ir_evaluator.primary_metric])
            # => 0.29335196224364596
    iP  
   )         r   d   F     TNc                   t         |           g | _        |D ]4  }||v st        ||         dkD  s| j                  j	                  |       6 | j                  D cg c]  }||   	 c}| _        t        |j                               | _        | j                  D cg c]  }||   	 c}| _	        || _
        || _        || _        || _        || _        || _        || _        || _        || _        || _        |	| _        |
| _        || _        || _        || _        || _        |r,t5        t        | j2                  j                                     ng | _        |rt9        |      nd | _        || _        |rd|z   }d|z   dz   | _        ddg| _         | jC                  | j6                         || _"        | jD                  rd|z   dz   | _#        y y c c}w c c}w )Nr   _z Information-Retrieval_evaluationz_results.csvepochstepsz_predictions.jsonl)$super__init__queries_idslenappendquerieslistkeys
corpus_idscorpusquery_promptquery_prompt_namecorpus_promptcorpus_prompt_namerelevant_docscorpus_chunk_sizemrr_at_k	ndcg_at_kaccuracy_at_kprecision_recall_at_kmap_at_kshow_progress_bar
batch_sizename	write_csvscore_functionssortedscore_function_namesr	   main_score_functiontruncate_dimcsv_filecsv_headers_append_csv_headerswrite_predictionspredictions_file)selfr   r!   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r5   r1   r4   r"   r#   r$   r%   r9   qidcid	__class__s                           /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/evaluation/InformationRetrievalEvaluator.pyr   z&InformationRetrievalEvaluator.__init__}   s   0 	 	-Cm#M#,>(?!(C  '',	- 150@0@AAv{{}-.2oo>svc{>(!2*"4*!2 "*%:" !2$	".Q`F40D0D0I0I0K+L$Mfh!Na#56I#Jgk (:D?$FW#W-  !:!:;!2!!$F$MPd$dD! "G B ?s   GGc                @   |D ]  }| j                   D ]"  }| j                  j                  | d|        $ | j                  D ]B  }| j                  j                  | d|        | j                  j                  | d|        D | j                  D ]"  }| j                  j                  | d|        $ | j
                  D ]"  }| j                  j                  | d|        $ | j                  D ]"  }| j                  j                  | d|        $  y )Nz
-Accuracy@z-Precision@z-Recall@z-MRR@z-NDCG@z-MAP@)r*   r7   r   r+   r(   r)   r,   )r;   r3   
score_nameks       r?   r8   z1InformationRetrievalEvaluator._append_csv_headers   sB   . 	AJ'' F  '':,j(DEF // D  '':,k!(EF  '':,hqc(BCD ]] A  '':,eA3(?@A ^^ B  '':,fQC(@AB ]] A  '':,eA3(?@A	A    c                   |dk7  r|dk(  rd| }nd| d| d}nd}| j                   |d| j                    dz  }t        j                  d	| j                   d
| d       | j                  J|j
                  |j                  i| _        |j
                  g| _        | j                  | j                          | j                  |g|d|i|}|| j                  rt        j                  j                  || j                        }	t        j                  j                  |	      sJt!        |	dd      }
|
j#                  dj                  | j$                               |
j#                  d       nt!        |	dd      }
||g}| j                  D ]  }| j&                  D ]  }|j)                  ||   d   |           | j*                  D ]6  }|j)                  ||   d   |          |j)                  ||   d   |          8 | j,                  D ]  }|j)                  ||   d   |           | j.                  D ]  }|j)                  ||   d   |           | j0                  D ]  }|j)                  ||   d   |            |
j#                  dj                  t3        t4        |                   |
j#                  d       |
j7                          | j8                  s| j:                  gt=        | j                  D cg c]"  }|||   d   t=        | j.                           f$ c}d       d   }| dt=        | j.                         | _        n3| j:                  j>                   dt=        | j.                         | _        |jA                         D ci c]Y  \  }}|jA                         D ]A  \  }}|jA                         D ])  \  }}| d|jC                  ddt5        |      z          |+ C [ }}}}}}}| jE                  || j                        }| jG                  ||||       |S c c}w c c}}}}}}w ) Nz after epoch z
 in epoch z after z stepsr   z (truncated to )z5Information Retrieval Evaluation of the model on the z dataset:output_pathwutf-8modeencoding,
a
accuracy@kprecision@krecall@kmrr@kndcg@kmap@kc                    | d   S )Nr    xs    r?   <lambda>z8InformationRetrievalEvaluator.__call__.<locals>.<lambda>  s
    !A$ rC   )keyr   z_ndcg@r   z@k@)$r5   loggerinfor/   r1   similarity_fn_name
similarityr3   r8   compute_metricesr0   ospathjoinr6   isfileopenwriter7   r*   r   r+   r(   r)   r,   mapstrcloseprimary_metricr4   maxvalueitemsreplaceprefix_name_to_metrics store_metrics_in_model_card_data)r;   modelrH   r   r   argskwargsout_txtscorescsv_pathfOutoutput_datar/   rB   score_functionvalues_dictmetric_namevaluesrn   metricss                       r?   __call__z&InformationRetrievalEvaluator.__call__   sS    B;{)%1&ugWUG6BG():):(;1==GKDII;V^_f^gghij'$)$<$<e>N>N#OD ).)A)A(BD%$$T%>%>?&&&uWW+WPVW "t~~ww||K?H77>>(+H3A

388D$4$456

4  H3A %.K11 A++ FA&&vd|L'A!'DEF 33 DA&&vd|M'B1'EF&&vd|J'?'BCD  AA&&vd|G'<Q'?@A  BA&&vd|H'=a'@AB  AA&&vd|G'<Q'?@AA" JJsxxC 567JJtJJL""''/!$UYUnUnoTdF4L23t~~3FGHo&" " *8(8s4>>?R>S&T#)-)A)A)G)G(HsSWSaSaObNc&d# 06||~
 
+'2'8'8':
 $V"LLN	
 5 a 3 3D#A, GHI5P
I
I
 
 --gtyyA--eWeUK p
s   'P6AP;c                	   ||}t        t        | j                        t        | j                        t        | j                        t        | j                        t        | j
                              }| j                  || j                  d| j                  | j                        }i }| j                  D ]'  }t        t        |            D 	cg c]  }	g  c}	||<   ) t        dt        | j                        | j                  d| j                          D ]  }
t#        |
| j                  z   t        | j                              }|8| j                  || j                  |
| d| j$                  | j&                        }n||
| }| j                  j)                         D ]  \  }} |||      }t+        j,                  |t#        |t        |d               ddd	
      \  }}|j/                         j1                         }|j/                         j1                         }t        t        |            D ]  }t3        ||   ||         D ]h  \  }}| j4                  |
|z      }t        ||   |         |k  rt7        j8                  ||   |   ||f       Kt7        j:                  ||   |   ||f       j    |D ]Y  }t        t        ||               D ]=  }t        t        ||   |               D ]  }||   |   |   \  }}||d||   |   |<     ? [ | j<                  r||D ]  }| j>                  jA                  dd| d      }tB        jD                  jG                  ||      }d}tI        ||d      5 }t        t        ||               D ]d  }| jJ                  |   }| j                  |   }||   |   }tM        |d d      }|||d}|jO                  tQ        jR                  |      dz          f 	 d d d         tT        jW                  dt        | j                                tT        jW                  dt        | j                         d       | j                  D ci c]  }|| jY                  ||          }}| jZ                  D ].  }tT        jW                  d|        | j]                  ||          0 |S c c}	w # 1 sw Y   xY wc c}w )Nquery)encode_fn_nameprompt_namepromptr   zCorpus Chunks)descdisabledocumentr   TF)dimlargestr2   )	corpus_idscorez.jsonlr   rI   rJ   rK   c                    | d   S Nr   rX   rY   s    r?   r[   z@InformationRetrievalEvaluator.compute_metrices.<locals>.<lambda>  s
    '
 rC   r\   reverse)query_idr   resultsrO   z	Queries: zCorpus: zScore-Function: )/rm   r(   r)   r*   r+   r,   embed_inputsr   r#   r"   r1   ranger   r   r!   r'   r-   minr%   r$   ro   torchtopkcputolistzipr    heapqheappushheappushpopr9   r:   rp   rc   rd   re   rg   r   r2   rh   jsondumpsr^   r_   compute_metricsr3   output_scores)r;   rs   corpus_modelcorpus_embeddingsrH   max_kquery_embeddingsqueries_result_listr/   r   corpus_start_idxcorpus_end_idxsub_corpus_embeddingsr{   pair_scorespair_scores_top_k_valuespair_scores_top_k_idx	query_itrsub_corpus_idr   r   doc_itrbase_filename	json_pathrL   ry   r   
query_textr   
predictionrw   s                                  r?   rb   z.InformationRetrievalEvaluator.compute_metrices$  sI     L""#**+
  ,,LL"..$$ - 
 !(( 	SD5:3?O;P5Q(R(R%	S !'s4;;!7!7o[_[q[qWq!
 *	h !!1D4J4J!JCPTP[P[L\]N !((,(9(9 KK 0@#- $ 7 7-- ): )% )::J>(Z% )-(<(<(B(B(D h$n,-=?TU CH**UCA,?!@aQU^cC?(*? ,D+G+G+I+P+P+R((=(A(A(C(J(J(L%!&s+;'<!= hI03-i8:RS\:]1 h,u %)OO4D}4T$U	 248CDuL!NN+>t+DY+ORWYbQcd!--.A$.G	.RUZ\eTfghhh%*	hX ( 	mD"3':4'@#AB m	$S)<T)B9)M%NO mG':4'@'KG'T$E9R[fkDl'-i8Amm	m !!k&=+ B $ 5 5 = =h!D6QWHX YGGLLmD	)$A BT%*3/B4/H+I%J B	#'#3#3I#>%)\\)%<
"5d";I"F #)6JTX"Y )1%/'.&
 

4::j#9D#@ABB BB, 	iDLL 1234hs4;;/034 UYThThiD$,,-@-FGGii -- 	-DKK*4&12vd|,	- o )SvB B* js   	S<A?S+SS	c           
         ||j                   }n#|dk(  r|j                  }n|dk(  r|j                  } |f||| j                  | j                  d| j
                  d|S )Nr   r   T)r   r   r.   r-   convert_to_tensorr5   )encodeencode_queryencode_documentr.   r-   r5   )r;   rs   	sentencesr   r   r   ru   	encode_fns           r?   r   z*InformationRetrievalEvaluator.embed_inputs  sz     !Iw&**Iz)--I	
#"44"**	
 	
 		
rC   c           	     D   | j                   D ci c]  }|d }}| j                  D ci c]  }|g  }}| j                  D ci c]  }|g  }}| j                  D ci c]  }|d }}| j                  D ci c]  }|g  }}| j                  D ci c]  }|g  }}t        t        |            D ]  }	| j                  |	   }
t        ||	   d d      }| j                  |
   }| j                   D ]"  }|d| D ]  }|d   |v s||xx   dz  cc<    " $ | j                  D ]R  }d}|d| D ]  }|d   |v s|dz  } ||   j                  ||z         ||   j                  |t        |      z         T | j                  D ]4  }t        |d|       D ]!  \  }}|d   |v s||xx   d|dz   z  z  cc<    4 6 | j                  D ]e  }|d| D cg c]  }|d   |v rdnd }}dgt        |      z  }| j                  ||      | j                  ||      z  }||   j                  |       g | j                  D ]`  }d}d}t        |d|       D ]  \  }}|d   |v s|dz  }|||dz   z  z  } |t        |t        |            z  }||   j                  |       b  |D ]"  }||xx   t        | j                        z  cc<   $ |D ]  }t        j                   ||         ||<    |D ]  }t        j                   ||         ||<    |D ]  }t        j                   ||         ||<    |D ]"  }||xx   t        | j                        z  cc<   $ |D ]  }t        j                   ||         ||<    ||||||dS c c}w c c}w c c}w c c}w c c}w c c}w c c}w )	Nr   c                    | d   S r   rX   rY   s    r?   r[   z?InformationRetrievalEvaluator.compute_metrics.<locals>.<lambda>  s
    AgJ rC   Tr   r   r   g      ?)rQ   rR   rS   rU   rT   rV   )r*   r+   r(   r)   r,   r   r   r   r2   r&   r   	enumeratecompute_dcg_at_kr   r   npmean)r;   r   rB   num_hits_at_kprecisions_at_krecall_at_kMRRndcg	AveP_at_kr   r   top_hitsquery_relevant_docsk_valhitnum_correctranktop_hitpredicted_relevancetrue_relevances
ndcg_valuesum_precisionsavg_precisions                          r?   r   z-InformationRetrievalEvaluator.compute_metrics  s   '+'9'9:!A::*.*D*DEQ1b5EE&*&@&@Aq"uAA!]]+q!t++#~~.!2..$(MM2qQU2	2 s#678 5	7I''	2H 1)<BV`deH"&"4"4X"> ++ #Ae, C;'+>>%e,1, 33 R#Ae, )C;'+>>#q()  &--kE.ABE"))+<O8P*PQR  !*8Ae+<!= ID#;'+>>E
cTAX&66
  	/[cdefk[l'PW-1DDA!K'# ' $%#,?(@"@!223FNQUQfQf#UR 
 U"":.	/  	7!"!*8Ae+<!= CID#;'+>>#q(&+*BBC !/UC@S<T1U U% ''6	7Y5	7p  	2A!DLL 11	2 ! 	=A!#);!<OA	=  	5AWW[^4KN	5  	'Aggd1g&DG	'  	(AFc$,,''F	(  	1A779Q<0IaL	1 (*#
 	
e ;EA+.2H's(   
M?
N
N	
N7
N
NNc                   |d   D ]0  }t         j                  dj                  ||d   |   dz               2 |d   D ]0  }t         j                  dj                  ||d   |   dz               2 |d   D ]0  }t         j                  dj                  ||d   |   dz               2 |d   D ]-  }t         j                  d	j                  ||d   |                / |d
   D ]-  }t         j                  dj                  ||d
   |                / |d   D ]-  }t         j                  dj                  ||d   |                / y )NrQ   zAccuracy@{}: {:.2f}%r   rR   zPrecision@{}: {:.2f}%rS   zRecall@{}: {:.2f}%rT   zMRR@{}: {:.4f}rU   zNDCG@{}: {:.4f}rV   zMAP@{}: {:.4f})r^   r_   format)r;   rw   rB   s      r?   r   z+InformationRetrievalEvaluator.output_scores  sv   % 	YAKK.55a9Ma9PSV9VWX	Y & 	[AKK/66q&:OPQ:RUX:XYZ	[ 
# 	UAKK,33Avj7I!7Ls7RST	U  	HAKK(//6'?13EFG	H ! 	JAKK)00F84DQ4GHI	J  	HAKK(//6'?13EFG	HrC   c                    d}t        t        t        |       |            D ]#  }|| |   t        j                  |dz         z  z  }% |S )Nr      )r   r   r   r   log2)
relevancesrB   dcgis       r?   r   z.InformationRetrievalEvaluator.compute_dcg_at_k$  sJ    s3z?A./ 	2A:a=2771q5>11C	2
rC   c                X    i }g d}|D ]  }t        | |      t        | |      ||<     |S )N)r5   r"   r#   r$   r%   )getattr)r;   config_dictconfig_dict_candidate_keysr\   s       r?   get_config_dictz-InformationRetrievalEvaluator.get_config_dict+  sF    &
" . 	6CtS!-#*4#5C 	6 rC   ),r   dict[str, str]r!   r   r&   zdict[str, set[str]]r'   intr(   	list[int]r)   r   r*   r   r+   r   r,   r   r-   boolr.   r   r/   rj   r0   r   r5   z
int | Noner1   z4dict[str, Callable[[Tensor, Tensor], Tensor]] | Noner4   zstr | SimilarityFunction | Noner"   
str | Noner#   r   r$   r   r%   r   r9   r   returnNone)NrE   rE   )
rs   r
   rH   r   r   r   r   r   r   dict[str, float])NNN)rs   r
   r   zTensor | NonerH   r   r   r   )rs   r
   r   zstr | list[str] | np.ndarrayr   r   r   r   r   r   r   z
np.ndarray)r   zlist[object])__name__
__module____qualname____doc__r   r8   r   rb   r   r   r   staticmethodr   r   __classcell__)r>   s   @r?   r   r      sP   cT "'!d "t#0+8"e"'#'PT?C#'(,$()-"'-BeBe Be +	Be
 Be Be Be !Be  )Be Be  Be Be Be Be !Be  N!Be" =#Be$ !%Be& &'Be( ")Be* '+Be,  -Be. 
/BeHA* #'O"O  O 	O
 O 
Oh +/"&t"t )	t
  t 
tt &*"&!
"
 0
 #	

  
 
 

4[
zH&  rC   r   )
__future__r   r   r   loggingrc   typingr   r   numpyr   r   r   tqdmr   2sentence_transformers.evaluation.SentenceEvaluatorr   *sentence_transformers.similarity_functionsr	   )sentence_transformers.SentenceTransformerr
   	getLoggerr   r^   r   rX   rC   r?   <module>r      sP    "    	 *     P IM			8	$`$5 `rC   