
    rhV                        d dl mZ d dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZmZ d dlmZmZmZ d dlmZmZmZ e	rd d	lmZ  ej0                  e      Z G d
 de      Ze G d de             Zy)    )annotationsN)	dataclassfield)Path)TYPE_CHECKINGAny)$SentenceTransformerModelCardCallback SentenceTransformerModelCardData)AsymModuleRouter)SparseAutoEncoderSparseStaticEmbeddingSpladePooling)SparseEncoderc                      e Zd Zy)SparseEncoderModelCardCallbackN)__name__
__module____qualname__     /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/sparse_encoder/model_card.pyr   r      s    r   r   c                  2    e Zd ZU dZ ed      Zded<    ed       Zded	<    edd
      Zded<    edd
      Z	ded<    e e
e      j                  dz  d
d
      Zded<    edd
d
      Zded<    edd
d
      Zded<   d fdZddZddZ xZS )SparseEncoderModelCardDataa	  A dataclass storing data used in the model card.

    Args:
        language (`Optional[Union[str, List[str]]]`): The model language, either a string or a list,
            e.g. "en" or ["en", "de", "nl"]
        license (`Optional[str]`): The license of the model, e.g. "apache-2.0", "mit",
            or "cc-by-nc-sa-4.0"
        model_name (`Optional[str]`): The pretty name of the model, e.g. "SparseEncoder based on answerdotai/ModernBERT-base".
        model_id (`Optional[str]`): The model ID when pushing the model to the Hub,
            e.g. "tomaarsen/se-mpnet-base-ms-marco".
        train_datasets (`List[Dict[str, str]]`): A list of the names and/or Hugging Face dataset IDs of the training datasets.
            e.g. [{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}, {"name": "STSB"}]
        eval_datasets (`List[Dict[str, str]]`): A list of the names and/or Hugging Face dataset IDs of the evaluation datasets.
            e.g. [{"name": "SNLI", "id": "stanfordnlp/snli"}, {"id": "mteb/stsbenchmark-sts"}]
        task_name (`str`): The human-readable task the model is trained on,
            e.g. "semantic search and sparse retrieval".
        tags (`Optional[List[str]]`): A list of tags for the model,
            e.g. ["sentence-transformers", "sparse-encoder"].
        local_files_only (`bool`): If True, don't attempt to find dataset or base model information on the Hub.Add commentMore actions
            Defaults to False.
        generate_widget_examples (`bool`): If True, generate widget examples from the evaluation or training dataset,
            and compute their similarities. Defaults to True.

    .. tip::

        Install `codecarbon <https://github.com/mlco2/codecarbon>`_ to automatically track carbon emission usage and
        include it in your model cards.

    Example::

        >>> model = SparseEncoder(
        ...     "microsoft/mpnet-base",
        ...     model_card_data=SparseEncoderModelCardData(
        ...         model_id="tomaarsen/se-mpnet-base-allnli",
        ...         train_datasets=[{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}],
        ...         eval_datasets=[{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}],
        ...         license="apache-2.0",
        ...         language="en",
        ...     ),
        ... )
    N)defaultstr	task_namec                 
    g dS )N)zsentence-transformerszsparse-encodersparser   r   r   r   <lambda>z#SparseEncoderModelCardData.<lambda>E   s	     !
 r   )default_factoryzlist[str] | NonetagsF)r   initzlist[list[str]] | Nonepredict_examplepipeline_tagzmodel_card_template.md)r   r$   reprr   template_pathSparse Encoder
model_typezSparseEncoder | Nonemodelc                   t         |   |       | j                  d| _        | j                  d| _        |j	                         D cg c]  }t        |t              s|j                  ! }}g }t        |v st        |v r|dgz  }t        |v r|dgz  }t        |v r|dgz  }t        |v r|dgz  }| j                  t        t        j                   |             |dgz  }dj#                  |      | _        y c c}w )	Nz$semantic search and sparse retrievalzfeature-extraction
AsymmetriczInference-freeSPLADECSRr)    )superregister_modelr   r&   modules
isinstancer   	__class__r   r   r   r   r   add_tagsmapr   lowerjoinr*   )selfr+   moduleall_modulesr*   r5   s        r   r2   z)SparseEncoderModelCardData.register_modelW   s    u%>>!CDN$ 4D6;mmodFTZ\bIcv''dd
;&K"7<.(J K/+,,JK'8*$J+5'!Jc#))Z01'((
((:.! es   	C7C7c           	        d}| j                   j                  r]dddddj                  | j                   j                  | j                   j                  j                  dd      j	                               }| j                   j                         | j                   j                         t        | j                         |t        | j                   dd       d	S )
NzDot ProductzCosine SimilarityzEuclidean DistancezManhattan Distance)cosinedot	euclidean	manhattan_r0   max_active_dims)model_max_lengthoutput_dimensionalitymodel_stringsimilarity_fn_namerC   )	r+   rG   getreplacetitleget_max_seq_length get_sentence_embedding_dimensionr   getattr)r:   rG   s     r   get_model_specific_metadataz6SparseEncoderModelCardData.get_model_specific_metadataq   s    *::((-$11	"
 c$**//1N1N1V1VWZ\_1`1f1f1hi  !%

 = = ?%)ZZ%P%P%R

O"4&tzz3DdK
 	
r   c                    | j                   S )N)r*   )r:   s    r   get_default_model_namez1SparseEncoderModelCardData.get_default_model_name   s    r   )r+   r   returnNone)rQ   zdict[str, Any])rQ   rR   )r   r   r   __doc__r   r   __annotations__r#   r%   r&   r   __file__parentr(   r*   r+   r2   rN   rP   __classcell__)r5   s   @r   r   r      s    (V 4(Is("
D
  /4Du.MO+M d7L#7X(=(=@X(X_dkpqM4q$45uMJM #(5u"MEM/4
"r   r   )
__future__r   loggingdataclassesr   r   pathlibr   typingr   r    sentence_transformers.model_cardr	   r
   sentence_transformers.modelsr   r   r   +sentence_transformers.sparse_encoder.modelsr   r   r   2sentence_transformers.sparse_encoder.SparseEncoderr   	getLoggerr   loggerr   r   r   r   r   <module>rc      sg    "  (  % s = = o oP			8	$	%I 	 l!A l lr   