
    rh                    Z    d dl mZ d dlmZ d dlmZ d dlmZmZ d dl	m
Z
  G d de      Zy)	    )annotations)Iterable)Tensor)TripletDistanceMetricTripletLoss)SparseEncoderc                  L     e Zd Zej                  df	 	 	 	 	 d fdZddZ xZS )SparseTripletLoss   c                *    t         |   |||       y)a^	  
        This class implements triplet loss. Given a triplet of (anchor, positive, negative),
        the loss minimizes the distance between anchor and positive while it maximizes the distance
        between anchor and negative. It compute the following loss function:

        ``loss = max(||anchor - positive|| - ||anchor - negative|| + margin, 0)``.

        Margin is an important hyperparameter and needs to be tuned respectively.

        Args:
            model: SparseEncoder
            distance_metric: Function to compute distance between two
                embeddings. The class TripletDistanceMetric contains
                common distance metrices that can be used.
            triplet_margin: The negative should be at least this much
                further away from the anchor than the positive.

        References:
            - For further details, see: https://en.wikipedia.org/wiki/Triplet_loss

        Requirements:
            1. Need to be used in SpladeLoss or CSRLoss as a loss function.
            2. (anchor, positive, negative) triplets

        Inputs:
            +---------------------------------------+--------+
            | Texts                                 | Labels |
            +=======================================+========+
            | (anchor, positive, negative) triplets | none   |
            +---------------------------------------+--------+

        Example:
            ::

                from datasets import Dataset

                from sentence_transformers.sparse_encoder import SparseEncoder, SparseEncoderTrainer, losses

                model = SparseEncoder("distilbert/distilbert-base-uncased")
                train_dataset = Dataset.from_dict(
                    {
                        "anchor": ["It's nice weather outside today.", "He drove to work."],
                        "positive": ["It's so sunny.", "He took the car to the office."],
                        "negative": ["It's quite rainy, sadly.", "She walked to the store."],
                    }
                )
                loss = losses.SpladeLoss(
                    model=model, loss=losses.SparseTripletLoss(model), document_regularizer_weight=3e-5, query_regularizer_weight=5e-5
                )

                trainer = SparseEncoderTrainer(model=model, train_dataset=train_dataset, loss=loss)
                trainer.train()
        )distance_metrictriplet_marginN)super__init__)selfmodelr   r   	__class__s       /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/sparse_encoder/losses/SparseTripletLoss.pyr   zSparseTripletLoss.__init__   s    p 	P^_    c                    t        d      )NzNSparseTripletLoss should not be used alone. Use it with SpladeLoss or CSRLoss.)AttributeError)r   sentence_featureslabelss      r   forwardzSparseTripletLoss.forwardF   s    mnnr   )r   r   r   floatreturnNone)r   zIterable[dict[str, Tensor]]r   r   r   r   )__name__
__module____qualname__r   	EUCLIDEANr   r   __classcell__)r   s   @r   r
   r
      s7    4I4S4Smn8`"8`ej8`	8`tor   r
   N)
__future__r   collections.abcr   torchr   (sentence_transformers.losses.TripletLossr   r   2sentence_transformers.sparse_encoder.SparseEncoderr   r
    r   r   <module>r)      s"    " $  W L<o <or   