
    rh`%                    j    d dl mZ d dlZd dlmZmZ d dlmZ d dlmZ  G d dej                        Z
y)    )annotationsN)Tensornn)CrossEncoder)fullnamec                  j     e Zd Z ej                         fd fdZddZddZd Ze	d	d       Z
 xZS )
MarginMSELossc                   t         |           || _        || _        t	        j
                  di || _        t        | j                  t              s8t        | j                  j                   dt        | j                         d      | j                  j                  dk7  r9t        | j                  j                   d| j                  j                   d      y)a  
        Computes the MSE loss between ``|sim(Query, Pos) - sim(Query, Neg)|`` and ``|gold_sim(Query, Pos) - gold_sim(Query, Neg)|``.
        This loss is often used to distill a cross-encoder model from a teacher cross-encoder model or gold labels.

        In contrast to :class:`~sentence_transformers.cross_encoder.losses.MultipleNegativesRankingLoss`, the two passages do not
        have to be strictly positive and negative, both can be relevant or not relevant for a given query. This can be
        an advantage of MarginMSELoss over MultipleNegativesRankingLoss.

        .. note::

            Be mindful of the magnitude of both the labels and what the model produces. If the teacher model produces
            logits with Sigmoid to bound them to [0, 1], then you may wish to use a Sigmoid activation function in the loss.

        Args:
            model (:class:`~sentence_transformers.cross_encoder.CrossEncoder`): A CrossEncoder model to be trained.
            activation_fn (:class:`~torch.nn.Module`): Activation function applied to the logits before computing the loss.
            **kwargs: Additional keyword arguments passed to the underlying :class:`torch.nn.MSELoss`.

        References:
            - Improving Efficient Neural Ranking Models with Cross-Architecture Knowledge Distillation: https://arxiv.org/abs/2010.02666
            - `Cross Encoder > Training Examples > Distillation <../../../examples/cross_encoder/training/distillation/README.html>`_

        Requirements:
            1. Your model must be initialized with `num_labels = 1` (a.k.a. the default) to predict one class.
            2. Usually uses a finetuned CrossEncoder teacher M in a knowledge distillation setup.

        Inputs:
            +------------------------------------------------+--------------------------------------------------------------------------------------------+-------------------------------+
            | Texts                                          | Labels                                                                                     | Number of Model Output Labels |
            +================================================+============================================================================================+===============================+
            | (query, passage_one, passage_two) triplets     | gold_sim(query, passage_one) - gold_sim(query, passage_two)                                | 1                             |
            +------------------------------------------------+--------------------------------------------------------------------------------------------+-------------------------------+
            | (query, passage_one, passage_two) triplets     | [gold_sim(query, passage_one), gold_sim(query, passage_two)]                               | 1                             |
            +------------------------------------------------+--------------------------------------------------------------------------------------------+-------------------------------+
            | (query, positive, negative_1, ..., negative_n) | [gold_sim(query, positive) - gold_sim(query, negative_i) for i in 1..n]                    | 1                             |
            +------------------------------------------------+--------------------------------------------------------------------------------------------+-------------------------------+
            | (query, positive, negative_1, ..., negative_n) | [gold_sim(query, positive), gold_sim(query, negative_1), ..., gold_sim(query, negative_n)] | 1                             |
            +------------------------------------------------+--------------------------------------------------------------------------------------------+-------------------------------+

        Relations:
            - :class:`MSELoss` is similar to this loss, but without a margin through the negative pair.

        Example:
            ::

                from sentence_transformers.cross_encoder import CrossEncoder, CrossEncoderTrainer, losses
                from datasets import Dataset

                student_model = CrossEncoder("microsoft/mpnet-base")
                teacher_model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L12-v2")
                train_dataset = Dataset.from_dict({
                    "query": ["What are pandas?", "What is the capital of France?"],
                    "positive": ["Pandas are a kind of bear.", "The capital of France is Paris."],
                    "negative": ["Pandas are a kind of fish.", "The capital of France is Berlin."],
                })

                def compute_labels(batch):
                    positive_scores = teacher_model.predict(list(zip(batch["query"], batch["positive"])))
                    negative_scores = teacher_model.predict(list(zip(batch["query"], batch["negative"])))
                    return {
                        "label": positive_scores - negative_scores
                    }

                train_dataset = train_dataset.map(compute_labels, batched=True)
                loss = losses.MarginMSELoss(student_model)

                trainer = CrossEncoderTrainer(
                    model=student_model,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()
        z? expects a model of type CrossEncoder, but got a model of type .   z; expects a model with 1 output label, but got a model with z output labels.N )super__init__modelactivation_fnr   MSELossloss_fct
isinstancer   
ValueError	__class____name__type
num_labels)selfr   r   kwargsr   s       /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/cross_encoder/losses/MarginMSELoss.pyr   zMarginMSELoss.__init__   s    T 	
*

,V,$**l3>>**+ ,++/

+;*<A? 
 ::  A%>>**+ ,((,

(=(='>oO  &    c           	     $   |d   }|d   }|dd  }t        |      }t        |t              rt        j                  |      }|j
                  |t        |      dz   fk(  r$|d d df   j                  d      |d d dd f   z
  }|j
                  |fk(  r|j                  d      }|j
                  |t        |      fk7  r't        d|j
                   d|t        |      f d      t        t        ||            }| j                  |      }g }	|D ]7  }
t        t        ||
            }|	j                  | j                  |             9 |j                  d      t        j                  |	d      z
  }| j                  ||j                               }|S )Nr   r      zLabels shape z does not match expected shape z. Ensure that your dataset labels/scores are 1) lists of differences between positive scores and negatives scores (length `num_negatives`), or 2) lists of positive and negative scores (length `num_negatives + 1`).)dim)lenr   listtorchstackshape	unsqueezer   ziplogits_from_pairsappendr   float)r   inputslabelsanchors	positives	negatives
batch_sizepositive_pairspositive_logitsnegative_logits_listnegativenegative_pairsmargin_logitslosss                 r   forwardzMarginMSELoss.forwardf   s   )1I	12J	\
 fd#[[(F<<JI(:;; AqD\++A.12>F <<J=(%%a(F<<JI77~-LjZ]^gZhMiLj k0 0  c'95600@!! 	PH!#gx"89N ''(>(>~(NO	P (11!4u{{CW]^7__}}]FLLN;r   c                    | j                   j                  |ddd      }|j                  | j                   j                          | j                   di |d   j	                  d      }| j                  |      S )z
        Computes the logits for a list of pairs using the model.

        Args:
            pairs (list[tuple[str, str]]): A list of pairs of strings (query, passage).

        Returns:
            Tensor: The logits for the pairs.
        Tpt)padding
truncationreturn_tensorsr   r   )r   	tokenizertodeviceviewr   )r   pairstokenslogitss       r   r(   zMarginMSELoss.logits_from_pairs   su     %%	 & 
 			$**##$%f%a(--b1!!&))r   c                0    dt        | j                        iS )Nr   )r   r   r   s    r   get_config_dictzMarginMSELoss.get_config_dict   s    Xd&8&89
 	
r   c                     y)NuY  
@misc{hofstätter2021improving,
    title={Improving Efficient Neural Ranking Models with Cross-Architecture Knowledge Distillation},
    author={Sebastian Hofstätter and Sophia Althammer and Michael Schröder and Mete Sertkan and Allan Hanbury},
    year={2021},
    eprint={2010.02666},
    archivePrefix={arXiv},
    primaryClass={cs.IR}
}
r   rG   s    r   citationzMarginMSELoss.citation   s    	r   )r   r   r   z	nn.ModulereturnNone)r+   zlist[list[str]]r,   zTensor | list[Tensor]rK   r   )rC   zlist[tuple[str, str]]rK   r   )rK   str)r   
__module____qualname__r   Identityr   r8   r(   rH   propertyrJ   __classcell__)r   s   @r   r	   r	   
   s9    GRr{{} Yv%N*(

 
 
r   r	   )
__future__r   r#   r   r   0sentence_transformers.cross_encoder.CrossEncoderr   sentence_transformers.utilr   Moduler	   r   r   r   <module>rW      s&    "   I /gBII gr   