
    rh                    b    d dl mZ d dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ  G d de	      Zy	)
    )annotations)Iterable)Tensor)util)
CoSENTLoss)SparseEncoderc                  B     e Zd Zdej                  fd fdZddZ xZS )SparseCoSENTLossg      4@c                6    d|_         t        | 	  |||      S )a
  
        This class implements CoSENT (Cosine Sentence).
        It expects that each of the InputExamples consists of a pair of texts and a float valued label, representing
        the expected similarity score between the pair.

        It computes the following loss function:

        ``loss = logsum(1+exp(s(i,j)-s(k,l))+exp...)``, where ``(i,j)`` and ``(k,l)`` are any of the input pairs in the
        batch such that the expected similarity of ``(i,j)`` is greater than ``(k,l)``. The summation is over all possible
        pairs of input pairs in the batch that match this condition.

        Args:
            model: SparseEncoder
            similarity_fct: Function to compute the PAIRWISE similarity
                between embeddings. Default is
                ``util.pairwise_cos_sim``.
            scale: Output of similarity function is multiplied by scale
                value. Represents the inverse temperature.

        References:
            - For further details, see: https://kexue.fm/archives/8847

        Requirements:
            - Need to be used in SpladeLoss or CSRLoss as a loss function.
            - Sentence pairs with corresponding similarity scores in range of the similarity function. Default is [-1,1].

        Inputs:
            +--------------------------------+------------------------+
            | Texts                          | Labels                 |
            +================================+========================+
            | (sentence_A, sentence_B) pairs | float similarity score |
            +--------------------------------+------------------------+

        Relations:
            - :class:`SparseAnglELoss` is SparseCoSENTLoss with ``pairwise_angle_sim`` as the metric, rather than ``pairwise_cos_sim``.

        Example:
            ::

                from datasets import Dataset

                from sentence_transformers.sparse_encoder import SparseEncoder, SparseEncoderTrainer, losses

                model = SparseEncoder("distilbert/distilbert-base-uncased")
                train_dataset = Dataset.from_dict(
                    {
                        "sentence1": ["It's nice weather outside today.", "He drove to work."],
                        "sentence2": ["It's so sunny.", "She walked to the store."],
                        "score": [1.0, 0.3],
                    }
                )
                loss = losses.SpladeLoss(
                    model=model, loss=losses.SparseCoSENTLoss(model), document_regularizer_weight=5e-5, use_document_regularizer_only=True
                )

                trainer = SparseEncoderTrainer(model=model, train_dataset=train_dataset, loss=loss)
                trainer.train()
        cosine)scalesimilarity_fct)similarity_fn_namesuper__init__)selfmodelr   r   	__class__s       /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/sparse_encoder/losses/SparseCoSENTLoss.pyr   zSparseCoSENTLoss.__init__   s%    v $, wU>RR    c                    t        d      )NzMSparseCoSENTLoss should not be used alone. Use it with SpladeLoss or CSRLoss.)AttributeError)r   sentence_featureslabelss      r   forwardzSparseCoSENTLoss.forwardK   s    lmmr   )r   r   r   floatreturnNone)r   zIterable[dict[str, Tensor]]r   r   r   r   )__name__
__module____qualname__r   cos_simr   r   __classcell__)r   s   @r   r
   r
      s    <@QUQ]Q] <S|nr   r
   N)
__future__r   collections.abcr   torchr   sentence_transformersr   'sentence_transformers.losses.CoSENTLossr   2sentence_transformers.sparse_encoder.SparseEncoderr   r
    r   r   <module>r+      s'    " $  & > L@nz @nr   