
    rh$,                        d dl mZ d dlZd dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ  ej                  e      Z G d dej                        Zy)    )annotationsN)Iterable)	FlopsLoss)SparseEncoderc                  ~     e Zd Z	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 d	 	 	 	 	 ddZd Zed	d       Z xZS )

SpladeLossc
                   t         
|           || _        || _        || _        || _        |	| _        ||nt        ||      | _        ||| _	        n|	st        ||      | _	        | j
                  |	st        j                  d       | j                  r(| j
                  t        j                  d       d| _        t        |d      st        d      y)a  
        SpladeLoss implements the loss function for the SPLADE (Sparse Lexical and Expansion) model,
        which combines a main loss function with regularization terms to control efficiency.

        This loss function balances effectiveness (via the main loss) with efficiency by regularizing
        both the query and document representations to be sparse, reducing computational requirements
        at inference time.

        Args:
            model: SparseEncoder model
            loss: The principal loss function to use can be any of the SparseEncoder losses except CSR related losses and flops loss.
            document_regularizer_weight: Weight for the corpus regularization term. This term encourages sparsity in the document embeddings.
                Will be applied to positive documents and all negatives one if some are provided. In some papers, this parameter is
                referred to as "lambda_d" (document) or "lambda_c" (corpus).
            query_regularizer_weight: Weight for the query regularization term. This term encourages sparsity in the query embeddings.
                If None, no query regularization will be applied, it's not a problem if you are in an inference-free setup or
                if you are having use_document_regularizer_only=True. Else you should have a query_regularizer_weight > 0.
                In some papers, this parameter is referred to as "lambda_q" (query).
            document_regularizer: Optional regularizer to use specifically for corpus regularization instead of the default FlopsLoss.
                This allows for different regularization strategies for documents vs queries.
            query_regularizer: Optional regularizer to use specifically for query regularization instead of the default FlopsLoss.
                This allows for different regularization strategies for queries vs documents.
            document_regularizer_threshold: Optional threshold for the number of non-zero (active) elements in the corpus embeddings to be considered in the FlopsLoss.
                If specified, only corpus embeddings with more than this number of non-zero (active) elements will be considered.
                Only used when document_regularizer is None (for the default FlopsLoss).
            query_regularizer_threshold: Optional threshold for the number of non-zero (active) elements in the query embeddings to be considered in the FlopsLoss.
                If specified, only query embeddings with more than this number of non-zero (active) elements will be considered.
                Only used when query_regularizer is None (for the default FlopsLoss).
            use_document_regularizer_only: If True, all input embeddings are treated as documents and regularized together with document_regularizer_weight.
                Especially useful when training with symmetric texts (e.g. pairs of documents) or more.

        References:
            - For more details, see the paper "From Distillation to Hard Negative Sampling: Making Sparse Neural IR Models More Effective"
              https://arxiv.org/abs/2205.04733

        Requirements:
            1. Input requirements depend on the chosen loss
            2. Usually used with a teacher model in a knowledge distillation setup and an associated loss

        Example:
            ::

                from datasets import Dataset

                from sentence_transformers.sparse_encoder import SparseEncoder, SparseEncoderTrainer, losses

                student_model = SparseEncoder("distilbert/distilbert-base-uncased")
                teacher_model = SparseEncoder("naver/splade-cocondenser-ensembledistil")
                train_dataset = Dataset.from_dict(
                    {
                        "query": ["It's nice weather outside today.", "He drove to work."],
                        "passage1": ["It's so sunny.", "He took the car to work."],
                        "passage2": ["It's very sunny.", "She walked to the store."],
                    }
                )

                def compute_labels(batch):
                    emb_queries = teacher_model.encode(batch["query"])
                    emb_passages1 = teacher_model.encode(batch["passage1"])
                    emb_passages2 = teacher_model.encode(batch["passage2"])
                    return {
                        "label": teacher_model.similarity_pairwise(emb_queries, emb_passages1)
                        - teacher_model.similarity_pairwise(emb_queries, emb_passages2)
                    }

                train_dataset = train_dataset.map(compute_labels, batched=True)
                loss = losses.SpladeLoss(
                    student_model,
                    loss=losses.SparseMarginMSELoss(student_model),
                    document_regularizer_weight=3e-5,
                    query_regularizer_weight=5e-5,
                )

                trainer = SparseEncoderTrainer(model=student_model, train_dataset=train_dataset, loss=loss)
                trainer.train()
        N)	thresholdzquery_regularizer_weight is None. This means that the query regularization will not be applied. If you are in an inference free set up it's fine else you should have a query_regularizer_weight > 0.zquery_regularizer_weight should be None when use_document_regularizer_only is True. use_document_regularizer_only mean we consider all the input to be of the same type and so under the same regularization. query_regularizer_weight will be ignored.compute_loss_from_embeddingszThe provided loss does not have a 'compute_loss_from_embeddings' method, which is required for SpladeLoss. This method must have the signature `compute_loss_from_embeddings(embeddings: List[Tensor], labels: Tensor | None = None)`.)super__init__modellossdocument_regularizer_weightquery_regularizer_weightuse_document_regularizer_onlyr   document_regularizerquery_regularizerloggingwarninghasattr
ValueError)selfr   r   r   r   r   r   document_regularizer_thresholdquery_regularizer_thresholdr   	__class__s             /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/sparse_encoder/losses/SpladeLoss.pyr   zSpladeLoss.__init__   s    p 	
	+F((@%-J*
 $/ !5,JK 	!
 (%6D".%.u@[%\D"((09VOO X --$2O2O2[OO J -1D)t;<N  =    c                P   |D cg c]  }| j                  |      d    }}i }| j                  j                  ||      }t        |t              r|j                  |       n||d<   | j                  r/| j                  j                  t        j                  |            }n1| j                  j                  t        j                  |dd              }|| j                  z  |d<   | j                  0| j                  j                  |d         }|| j                  z  |d<   |S c c}w )Nsentence_embedding	base_loss   document_regularizer_lossr   query_regularizer_loss)r   r   r   
isinstancedictupdater   r   torchcatr   r   r   )	r   sentence_featureslabelssentence_feature
embeddingslossesr!   corpus_loss
query_losss	            r   forwardzSpladeLoss.forward   s    bssM]djj!123GHs
sII:::vN	i&MM)$"+F;--33PPQVQZQZ[eQfgK33PPQVQZQZ[efgfh[iQjkK.9D<\<\.\*+ ((4//LLZXY][J/9D<Y<Y/YF+,+ ts   D#c                   | j                   | j                  d}| j                  | j                  |d<   t        | j                  t
              s#| j                  j                  j                  |d<   t        | j                  d      r/| j                  j                  | j                  j                  |d<   t        | d      r| j                  t        | j                  t
              s#| j                  j                  j                  |d<   t        | j                  d      r/| j                  j                  | j                  j                  |d<   |S )z
        Get the configuration dictionary.

        Returns:
            Dictionary containing the configuration parameters
        )r   r   r   r   r
   r   r   r   )r   r   r   r%   r   r   r   __name__r   r
   r   )r   config_dicts     r   get_config_dictzSpladeLoss.get_config_dict   s"    II+/+K+K
 ((46:6S6SK23 $33Y?262K2K2U2U2^2^K./4,,k:t?X?X?b?b?n<@<U<U<_<_K894,-$2H2H2Td44i@373I3I3S3S3\3\/0t--{;@V@V@`@`@l=A=S=S=]=]9:r   c                     y)Nu  
@misc{formal2022distillationhardnegativesampling,
      title={From Distillation to Hard Negative Sampling: Making Sparse Neural IR Models More Effective},
      author={Thibault Formal and Carlos Lassance and Benjamin Piwowarski and Stéphane Clinchant},
      year={2022},
      eprint={2205.04733},
      archivePrefix={arXiv},
      primaryClass={cs.IR},
      url={https://arxiv.org/abs/2205.04733},
}
 )r   s    r   citationzSpladeLoss.citation   s    
r   )NNNNNF)r   r   r   z	nn.Moduler   floatr   zfloat | Noner   nn.Module | Noner   r:   r   
int | Noner   r;   r   bool)N)r*   z!Iterable[dict[str, torch.Tensor]]r+   ztorch.Tensor | Nonereturnzdict[str, torch.Tensor])r=   str)	r3   
__module____qualname__r   r1   r5   propertyr8   __classcell__)r   s   @r   r   r      s     2615.25926.3ww w &+	w
 #/w /w ,w )3w &0w (,wt cg!BL_	 66  r   r   )
__future__r   r   collections.abcr   r(   torch.nnnn+sentence_transformers.sparse_encoder.lossesr   2sentence_transformers.sparse_encoder.SparseEncoderr   	getLoggerr3   loggerModuler   r7   r   r   <module>rL      s>    "  $   A L			8	$| |r   