
    rhp                       d dl mZ d dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZ d dlmZ  e       rd d	lmZmZmZmZmZ  ej0                  e      Ze	rd d
lmZ  G d de      Ze G d de             ZddZy)    )annotationsN)	dataclassfield)Path)TYPE_CHECKINGAny)	ModelCard)$SentenceTransformerModelCardCallback SentenceTransformerModelCardData)is_datasets_available)DatasetDatasetDictIterableDatasetIterableDatasetDictValue)CrossEncoderc                        e Zd Zd fdZ xZS )CrossEncoderModelCardCallbackc                $    t         |   |       y N)super__init__)selfdefault_args_dict	__class__s     /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/cross_encoder/model_card.pyr   z&CrossEncoderModelCardCallback.__init__   s    *+    )r   dict[str, Any]returnNone)__name__
__module____qualname__r   __classcell__r   s   @r   r   r      s    , ,r   r   c                  B    e Zd ZU dZ ed      Zded<    ed       Zded	<    edd
      Zded<    edd
d
      Z	ded<    edd
      Z
ded<    e ee      j                  dz  d
d
      Zded<    edd
d
      Zded<   ddZd fdZddZd dZd dZ xZS )!CrossEncoderModelCardDataa  A dataclass storing data used in the model card.

    Args:
        language (`Optional[Union[str, List[str]]]`): The model language, either a string or a list,
            e.g. "en" or ["en", "de", "nl"]
        license (`Optional[str]`): The license of the model, e.g. "apache-2.0", "mit",
            or "cc-by-nc-sa-4.0"
        model_name (`Optional[str]`): The pretty name of the model, e.g. "CrossEncoder based on answerdotai/ModernBERT-base".
        model_id (`Optional[str]`): The model ID when pushing the model to the Hub,
            e.g. "tomaarsen/ce-mpnet-base-ms-marco".
        train_datasets (`List[Dict[str, str]]`): A list of the names and/or Hugging Face dataset IDs of the training datasets.
            e.g. [{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}, {"name": "STSB"}]
        eval_datasets (`List[Dict[str, str]]`): A list of the names and/or Hugging Face dataset IDs of the evaluation datasets.
            e.g. [{"name": "SNLI", "id": "stanfordnlp/snli"}, {"id": "mteb/stsbenchmark-sts"}]
        task_name (`str`): The human-readable task the model is trained on,
            e.g. "semantic search and paraphrase mining".
        tags (`Optional[List[str]]`): A list of tags for the model,
            e.g. ["sentence-transformers", "cross-encoder"].
        local_files_only (`bool`): If True, don't attempt to find dataset or base model information on the Hub.
            Defaults to False.

    .. tip::

        Install `codecarbon <https://github.com/mlco2/codecarbon>`_ to automatically track carbon emission usage and
        include it in your model cards.

    Example::

        >>> model = CrossEncoder(
        ...     "microsoft/mpnet-base",
        ...     model_card_data=CrossEncoderModelCardData(
        ...         model_id="tomaarsen/ce-mpnet-base-allnli",
        ...         train_datasets=[{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}],
        ...         eval_datasets=[{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}],
        ...         license="apache-2.0",
        ...         language="en",
        ...     ),
        ... )
    N)defaultstr	task_namec                 
    g dS )N)zsentence-transformerszcross-encoderreranker r-   r   r   <lambda>z"CrossEncoderModelCardData.<lambda>H   s	     !
 r   )default_factoryzlist[str] | NonetagsF)r(   initzlist[list[str]] | Nonepredict_exampleT)r(   r1   reprzbool | Noneir_modelpipeline_tagzmodel_card_template.mdr   template_pathzCrossEncoder | Nonemodelc                   t        |t              r|t        |j                               d      }t        |t        t
        f      ryt        |      dk(  ry|j                  j                         D cg c]c  \  }}t        |t              r|j                  dv s>t        |d      r4t        |j                  t              r|j                  j                  dv r|e }}}t        |      dk  ry|d   }|d   }t        |d   |         }t        |d   |         }|dd |   }	|dd |   }
|t        u r|
d   dd }
|	d   gt        |
      z  }	|t        u r't        |	|
      D cg c]	  \  }}||g c}}| _        yyc c}}w c c}}w )a  
        We don't set widget examples, but only load the prediction example.
        This is because the Hugging Face Hub doesn't currently have a Sentence Ranking
        or Text Classification widget that accepts pairs, which is what CrossEncoder
        models require.
        r   N>   stringlarge_stringfeature         )
isinstancer   listkeysr   r   lenfeaturesitemsr   dtypehasattrr;   typer)   zipr2   )r   datasetcolumnr;   columnsquery_columnanswer_column
query_typeanswer_typequeriesanswersqueryresponses                r   set_widget_examplesz-CrossEncoderModelCardData.set_widget_examplesZ   s    g{+d7<<>2156Gg1DEFw<1 $+#3#3#9#9#;	
7E*w}}@Z/Z+w6OO))-GG 	
 	
 w<!qz
'!*\23
71:m45"1+l+"1+m, $aj!nGqzlS\1GMPQXZaMb#c/%UH$5#cD  7	
8 $ds   3A(E;"Fc                    t         |   |       | j                  |j                  dk(  rdnd| _        | j                  |j                  dk(  rdnd| _        y y )Nr=   z"text reranking and semantic searchztext pair classificationztext-rankingztext-classification)r   register_modelr*   
num_labelsr5   )r   r7   r   s     r   rV   z(CrossEncoderModelCardData.register_model   s`    u%>>!8=8H8HA8M4Sm N $272B2Ba2GMbD %r   c                8    | j                   j                  |      S r   )r7   	tokenizer)r   textkwargss      r   tokenizez"CrossEncoderModelCardData.tokenize   s    zz##D))r   c                     y r   r-   r   s    r   run_usage_snippetz+CrossEncoderModelCardData.run_usage_snippet   s     	r   c                \    | j                   j                  | j                   j                  dS )N)model_max_lengthmodel_num_labels)r7   
max_lengthrW   r^   s    r   get_model_specific_metadataz5CrossEncoderModelCardData.get_model_specific_metadata   s&     $

 5 5 $

 5 5
 	
r   )rI   zDataset | DatasetDictr   r    )r   r    )rZ   zstr | list[str]r   r   )r   r   )r!   r"   r#   __doc__r   r*   __annotations__r0   r2   r4   r5   r   __file__parentr6   r7   rT   rV   r\   r_   rd   r$   r%   s   @r   r'   r'      s    &R 4(Is("
D
  /4Du.MO+M!$UGHkG d7L#7X(=(=@X(X_dkpqM4q "'t%e!LEL-d^c*

r   r'   c                    t        j                  | j                  | j                  j                  d      }|j                  S )Nu   🤗)	card_datar6   hf_emoji)r	   from_templatemodel_card_datar6   content)r7   
model_cards     r   generate_model_cardrp      s:    ((''u7L7L7Z7ZekJ r   )r7   r   r   r)   ) 
__future__r   loggingdataclassesr   r   pathlibr   typingr   r   huggingface_hubr	    sentence_transformers.model_cardr
   r   sentence_transformers.utilr   datasetsr   r   r   r   r   	getLoggerr!   logger0sentence_transformers.cross_encoder.CrossEncoderr   r   r'   rp   r-   r   r   <module>r}      ss    "  (  % % s <ZZ			8	$M,$H ,
 C
 @ C
 C
Lr   