
    rh1                        d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ erd dlmZ  ej                  e      Z G d d	e
      Zy)
    )annotationsN)defaultdict)TYPE_CHECKING)SentenceEvaluator)paraphrase_mining)SentenceTransformerc                       e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 d	 	 	 	 	 	 	 	 	 d	dZed        Zd Z xZ	S )
ParaphraseMiningEvaluatora  
    Given a large set of sentences, this evaluator performs paraphrase (duplicate) mining and
    identifies the pairs with the highest similarity. It compare the extracted paraphrase pairs
    with a set of gold labels and computes the F1 score.

    Args:
        sentences_map (Dict[str, str]): A dictionary that maps sentence-ids to sentences.
            For example, sentences_map[id] => sentence.
        duplicates_list (List[Tuple[str, str]], optional): A list with id pairs [(id1, id2), (id1, id5)]
            that identifies the duplicates / paraphrases in the sentences_map. Defaults to None.
        duplicates_dict (Dict[str, Dict[str, bool]], optional): A default dictionary mapping [id1][id2]
            to true if id1 and id2 are duplicates. Must be symmetric, i.e., if [id1][id2] => True,
            then [id2][id1] => True. Defaults to None.
        add_transitive_closure (bool, optional): If true, it adds a transitive closure,
            i.e. if dup[a][b] and dup[b][c], then dup[a][c]. Defaults to False.
        query_chunk_size (int, optional): To identify the paraphrases, the cosine-similarity between
            all sentence-pairs will be computed. As this might require a lot of memory, we perform
            a batched computation. query_chunk_size sentences will be compared against up to
            corpus_chunk_size sentences. In the default setting, 5000 sentences will be grouped
            together and compared up-to against 100k other sentences. Defaults to 5000.
        corpus_chunk_size (int, optional): The corpus will be batched, to reduce the memory requirement.
            Defaults to 100000.
        max_pairs (int, optional): We will only extract up to max_pairs potential paraphrase candidates.
            Defaults to 500000.
        top_k (int, optional): For each query, we extract the top_k most similar pairs and add it to a sorted list.
            I.e., for one sentence we cannot find more than top_k paraphrases. Defaults to 100.
        show_progress_bar (bool, optional): Output a progress bar. Defaults to False.
        batch_size (int, optional): Batch size for computing sentence embeddings. Defaults to 16.
        name (str, optional): Name of the experiment. Defaults to "".
        write_csv (bool, optional): Write results to CSV file. Defaults to True.
        truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to.
            `None` uses the model's current truncation dimension. Defaults to None.

    Example:
        ::

            from datasets import load_dataset
            from sentence_transformers.SentenceTransformer import SentenceTransformer
            from sentence_transformers.evaluation import ParaphraseMiningEvaluator

            # Load a model
            model = SentenceTransformer('all-mpnet-base-v2')

            # Load the Quora Duplicates Mining dataset
            questions_dataset = load_dataset("sentence-transformers/quora-duplicates-mining", "questions", split="dev")
            duplicates_dataset = load_dataset("sentence-transformers/quora-duplicates-mining", "duplicates", split="dev")

            # Create a mapping from qid to question & a list of duplicates (qid1, qid2)
            qid_to_questions = dict(zip(questions_dataset["qid"], questions_dataset["question"]))
            duplicates = list(zip(duplicates_dataset["qid1"], duplicates_dataset["qid2"]))

            # Initialize the paraphrase mining evaluator
            paraphrase_mining_evaluator = ParaphraseMiningEvaluator(
                sentences_map=qid_to_questions,
                duplicates_list=duplicates,
                name="quora-duplicates-dev",
            )
            results = paraphrase_mining_evaluator(model)
            '''
            Paraphrase Mining Evaluation of the model on the quora-duplicates-dev dataset:
            Number of candidate pairs: 250564
            Average Precision: 56.51
            Optimal threshold: 0.8325
            Precision: 52.76
            Recall: 59.19
            F1: 55.79
            '''
            print(paraphrase_mining_evaluator.primary_metric)
            # => "quora-duplicates-dev_average_precision"
            print(results[paraphrase_mining_evaluator.primary_metric])
            # => 0.5650940787776353
    c           
        t         |           g | _        g | _        |j	                         D ];  \  }}| j                  j                  |       | j                  j                  |       = || _        |	| _        |
| _        || _	        || _
        || _        || _        || _        ||nt        d       | _        |8|D ]3  \  }}||v s||v sd| j                  |   |<   d| j                  |   |<   5 |r | j!                  | j                        | _        t#               }| j                  D ]j  }| j                  |   D ]V  }||v s||v s| j                  |   |   s| j                  |   |   s2|j%                  t'        t)        ||g                   X l t+        |      | _        |rd|z   }d|z   dz   | _        g d| _        || _        d| _        y )Nc                      t        t              S )N)r   bool     /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/evaluation/ParaphraseMiningEvaluator.py<lambda>z4ParaphraseMiningEvaluator.__init__.<locals>.<lambda>}   s    bmnrbs r   T_paraphrase_mining_evaluationz_results.csv)epochsteps	precisionrecallf1	thresholdaverage_precisionr   )super__init__	sentencesidsitemsappendnameshow_progress_bar
batch_sizequery_chunk_sizecorpus_chunk_size	max_pairstop_ktruncate_dimr   
duplicatesadd_transitive_closuresetaddtuplesortedlentotal_num_duplicatescsv_filecsv_headers	write_csvprimary_metric)selfsentences_mapduplicates_listduplicates_dictr*   r$   r%   r&   r'   r"   r#   r!   r3   r(   idsentenceid1id2positive_key_pairskey1key2	__class__s                        r   r   z"ParaphraseMiningEvaluator.__init__\   s     	)//1 	 LBNN!!(+HHOOB	  	!2$ 0!2"
(-<-H/kZsNt&+ 5S-'C=,@04DOOC(-04DOOC(-5 ""99$//JDO UOO 	HD- HM)-.t48Md8S&**5t1E+FGH	H %((:$;!:D;dB^Sl"1r   c                j   |dk7  r|dk(  rd| }nd| d| d}nd}| j                   |d| j                    dz  }t        j                  d	| j                   d
| d       t	        || j
                  | j                  | j                  | j                  | j                  | j                  | j                  | j                   	      }t        j                  dt        t        |            z          dx}}d}	dx}
x}}d}t        t        |            D ]  }||   \  }}}| j                  |   }| j                  |   }|dz  }| j                   |   |   s| j                   |   |   sT|dz  }||z  }|| j"                  z  }d|z  |z  ||z   z  }||z  }||
kD  s|}
|}|}||   d   |t%        |dz   t        |      dz
           d   z   dz  }	 || j"                  z  }t        j                  d|dz  d       t        j                  d|	d       t        j                  d|dz  d       t        j                  d|dz  d       t        j                  d|
dz  dd       || j&                  rt(        j*                  j-                  || j.                        }t(        j*                  j1                  |      sat3        |ddd      5 }t5        j6                  |      }|j9                  | j:                         |j9                  |||||
|	|g       d d d        nEt3        |ddd      5 }t5        j6                  |      }|j9                  |||||
|	|g       d d d        ||
|||	d}| j=                  || j                        }| j?                  ||||       |S # 1 sw Y   CxY w# 1 sw Y   OxY w)Nz after epoch z
 in epoch z after z steps z (truncated to )z1Paraphrase Mining Evaluation of the model on the z dataset:)r"   r#   r$   r%   r&   r'   r(   zNumber of candidate pairs: r         zAverage Precision: d   z.2fzOptimal threshold: z.4fzPrecision: zRecall: zF1: 
wzutf-8)newlinemodeencodinga)r   r   r   r   r   ) r(   loggerinfor!   r   r   r"   r#   r$   r%   r&   r'   strr/   ranger   r)   r0   minr3   ospathjoinr1   isfileopencsvwriterwriterowr2   prefix_name_to_metrics store_metrics_in_model_card_data)r5   modeloutput_pathr   r   out_txt
pairs_list	n_extract	n_correctr   best_f1best_recallbest_precisionr   idxscoreijr;   r<   r   r   r   csv_pathfrZ   metricss                              r   __call__z"ParaphraseMiningEvaluator.__call__   s    B;{)%1&ugWUG6BG():):(;1==GG		{RZ[bZccdef 'NN"44!22"44nn****


 	1CJ4HHI !"!	I	1222+Z) 	lC$S/KE1a((1+C((1+C NIs#C(DOOC,@,EQ	%	1	"T%>%>>]V+y6/AB!Y.!< G%.N"(K!+C!3jS1WcR\o`aNaAb6cde6f!fjk kI#	l& .0I0II)*;c*A#)FGH))C9:k.3"6s!;<=h{S0567d7S=-R01"t~~ww||K?H77>>(+(BS7K xq ZZ]FOOD$4$45OOUE>;PWYbdu$vwx x
 (BS7K xq ZZ]FOOUE>;PWYbdu$vwx
 "3'!"
 --gtyyA--eWeUK%x x
x xs   A	N-.N)N&)N2c                t   t               }t        | j                               D ]  }||vs	t               }|j                  |       t        | |         }t	        |      dkD  rI|j                  d      }||vr%|j                  |       |j                  | |          t	        |      dkD  rIt        |      }t        t	        |      dz
        D ]c  }t        |dz   t	        |            D ]F  }d| ||      ||   <   d| ||      ||   <   |j                  ||          |j                  ||          H e  | S )Nr   rF   T)r+   listkeysr,   r/   popextendrR   )graphnodes_visitedrN   connected_subgraph_nodesneighbor_nodes_queuenoderi   rj   s           r   r*   z0ParaphraseMiningEvaluator.add_transitive_closure   sb   ejjl# 	GA%+.5((,,Q/ (,E!H~$./!3/33A6D#;;044T:,33E$K@	 ./!3 ,00H+I(s#;<q@A GA"1q5#.F*GH GZ^6q9:;STU;VWZ^6q9:;STU;VW%))*B1*EF%))*B1*EFGG	G, r   c                    | j                   | j                  | j                  d}| j                  | j                  |d<   |S )N)r*   r&   r'   r(   )r*   r&   r'   r(   )r5   config_dicts     r   get_config_dictz)ParaphraseMiningEvaluator.get_config_dict  sD    &*&A&AZZ

 (*.*;*;K'r   )NNFi  i i  rH   F   rC   TN)r6   zdict[str, str]r7   zlist[tuple[str, str]] | Noner8   z!dict[str, dict[str, bool]] | Noner*   r   r$   intr%   r}   r&   r}   r'   r}   r"   r   r#   r}   r!   rQ   r3   r   r(   z
int | None)NrB   rB   )
r^   r   r_   z
str | Noner   r}   r   r}   returnzdict[str, float])
__name__
__module____qualname____doc__r   rn   staticmethodr*   r{   __classcell__)r@   s   @r   r
   r
      s
   GX 9==A', $!'"'#'>2%>2 6>2 ;	>2
 !%>2 >2 >2 >2 >2  >2 >2 >2 >2 !>2B ikU(U7AUQTUbeU	Un  4r   r
   )
__future__r   rY   loggingrT   collectionsr   typingr   2sentence_transformers.evaluation.SentenceEvaluatorr   sentence_transformers.utilr   )sentence_transformers.SentenceTransformerr   	getLoggerr   rO   r
   r   r   r   <module>r      sD    " 
  	 #   P 8M			8	$D 1 Dr   