
    rhu%                        d dl mZ d dlmZ d dlZd dlmZ d dlmc mZ	 d dl
mZ d dlmZ ddZ G d dej                        Z G d	 d
ej                        Zy)    )annotations)IterableN)"SparseMultipleNegativesRankingLoss)SparseEncoderc                    |j                  d      }t        j                  | |      t        j                  |dddf   j                  |j                        |      z  }|S )z
    :param reconstruction: output of Autoencoder.decode (shape: [batch, n_inputs])
    :param original_input: input of Autoencoder.encode (shape: [batch, n_inputs])
    :return: normalized mean squared error (shape: [1])
    r   )dimN)meanFmse_lossbroadcast_toshape)reconstructionoriginal_inputoriginal_input_meanlosss       /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/sparse_encoder/losses/CSRLoss.pynormalized_mean_squared_errorr      s^     )--!-4::nn5

D!G$11.2F2FG9 D K    c                  8     e Zd Zdd fdZddZddZd Z xZS )	CSRReconstructionLossc                >    t         |           || _        || _        y)a  
        CSRReconstructionLoss implements the reconstruction loss component for Contrastive Sparse Representation (CSR) models.

        This loss ensures that the sparse encoding can accurately reconstruct the original model embeddings through
        three components:

        1. A primary reconstruction loss (L_k) that measures the error between the original embedding and its
           reconstruction using the top-k sparse components.
        2. A secondary reconstruction loss (L_4k) that measures the error using the top-4k sparse components.
        3. An auxiliary loss (L_aux) that helps to learn residual information.

        Args:
            model: SparseEncoder model with autoencoder components
            beta: Weight for the auxiliary loss component (L_aux)

        References:
            - For more details, see the paper "Beyond Matryoshka: Revisiting Sparse Coding for Adaptive Representation"
              https://arxiv.org/abs/2503.01776

        Requirements:
            1. The model must be configured to output the necessary reconstruction components
            2. Used with SparseEncoder models that implement compositional sparse autoencoding

        Relations:
            - Used as a component within :class:`CSRLoss` combined with a contrastive loss

        Example:
            - This loss is never used standalone, but instead used within the :class:`CSRLoss` class. See that loss for more details.
        N)super__init__modelbeta)selfr   r   	__class__s      r   r   zCSRReconstructionLoss.__init__   s    < 	
	r   c                    t        d      )Nz[CSRReconstructionLoss is not intended to be used standalone. Use it within CSRLoss instead.)NotImplementedError)r   sentence_featuress     r   forwardzCSRReconstructionLoss.forward?   s    !i
 	
r   c                h   d}d}d}|D ]s  }|d   }|d   }|d   }|d   }	|d   }
t        j                  ||      }t        j                  ||      }t        |	||
j                         z
        }||z  }||z  }||z  }u t	        |      }|dkD  r||z  }||z  }||z  }||dz  | j
                  |z  d	S )
a  
        Compute the CSRReconstruction loss from embeddings.

        Args:
            outputs: List of dictionaries containing sentence embeddings and their sparse representations

        Returns:
            total_loss: The total reconstruction loss value
        g        sentence_embedding_backbonedecoded_embedding_kdecoded_embedding_4kdecoded_embedding_auxdecoded_embedding_k_pre_biasr   g       @)reconstruction_loss_kreconstruction_loss_4kreconstruction_loss_aux)r
   r   r   detachlenr   )r   outputs	total_L_k
total_L_4ktotal_L_auxfeaturesxrecons_k	recons_4k
recons_auxreconsk_pre_biasL_kL_4kL_auxnum_columnss                  r   compute_loss_from_embeddingsz2CSRReconstructionLoss.compute_loss_from_embeddingsD   s    	
   	!H67A 56H !78I!"9:J'(FG **Q)C ::a+D 2*aBRBYBYB[>[\E I$J5 K'	!, 'l?$I+%J;&K &/&03&6'+yy;'>
 	
r   c                    d| j                   iS )
        Get the configuration dictionary.

        Returns:
            Dictionary containing the configuration parameters
        r   )r   r   s    r   get_config_dictz%CSRReconstructionLoss.get_config_dictw   s     		""r   )      ?)r   r   r   floatreturnNone)r    !Iterable[dict[str, torch.Tensor]]rB   dict[str, torch.Tensor])r-   zlist[dict[str, torch.Tensor]]rB   rE   )__name__
__module____qualname__r   r!   r;   r?   __classcell__r   s   @r   r   r      s     D

1
f#r   r   c                  P     e Zd Zdd fdZ	 d	 	 	 	 	 ddZd Zed	d       Z xZS )
CSRLossc                    t         |           || _        || _        || _        t        ||      | _        ||| _        yt        |      | _        y)a4	  
        CSRLoss implements a combined loss function for Contrastive Sparse Representation (CSR) models.

        This loss combines two components:

        1. A reconstruction loss :class:`CSRReconstructionLoss` that ensures the sparse representation can faithfully
            reconstruct the original embedding.
        2. A main loss, which in the paper is a :class:`SparseMultipleNegativesRankingLoss` that ensures semantically
            similar sentences have similar representations.

        The total loss is linear combination of the two losses.

        Args:
            model: SparseEncoder model
            loss: The principal loss function to use can be any of the SparseEncoder losses except flops loss and CSRReconstruction loss.
                If None, the default loss is used, which is the SparseMultipleNegativesRankingLoss.
            beta: Weight for the L_aux component in the reconstruction loss. Default is 0.1.
            gamma: Weight for the main loss component (MNRL a.k.a. InfoNCE by default). Default is 1.0.

        References:
            - For more details, see the paper "Beyond Matryoshka: Revisiting Sparse Coding for Adaptive Representation"
            https://arxiv.org/abs/2503.01776

        Requirements:
            1. Input requirements depend on the chosen loss
            2. Uses autoencoder components of the SparseEncoder model

        Relations:
            - Uses :class:`CSRReconstructionLoss` for the reconstruction component

        Example:
            ::

                from datasets import Dataset
                from sentence_transformers.sparse_encoder import SparseEncoder, SparseEncoderTrainer, losses

                model = SparseEncoder("sentence-transformers/all-MiniLM-L6-v2")
                train_dataset = Dataset.from_dict(
                    {
                        "anchor": ["It's nice weather outside today.", "He drove to work."],
                        "positive": ["It's so sunny.", "He took the car to the office."],
                        "negative": ["It's quite rainy, sadly.", "She walked to the store."],
                    }
                )
                loss = losses.CSRLoss(model, beta=0.1, gamma=1.0)

                trainer = SparseEncoderTrainer(model=model, train_dataset=train_dataset, loss=loss)
                trainer.train()
        )r   r   N)r   )	r   r   r   r   gammar   reconstruction_lossr   r   )r   r   r   r   rN   r   s        r   r   zCSRLoss.__init__   sP    d 	
	
 $9u4#P  ,D	2T[`2a	r   c                   |D cg c]  }| j                  |       }}|D cg c]  }|d   	 }}| j                  j                  |      }| j                  j                  ||      }t	        |t
              r,|j                         D ]  \  }	}
|
| j                  z  ||	<    |S || j                  z  |d<   |S c c}w c c}w )Nsentence_embedding	base_loss)r   rO   r;   r   
isinstancedictitemsrN   )r   r    labelssentence_featurer-   outputrQ   lossesrR   keyvalues              r   r!   zCSRLoss.forward   s     IZZ4D4::./ZZIPQvf%9:QQ))FFwOII::;MvV	i&'oo/ 1
U#djj0s1
  #,djj"8F; [Qs
   B8B=c                J    | j                   | j                  | j                  dS )r=   r   rN   r   r]   r>   s    r   r?   zCSRLoss.get_config_dict   s     		DJJ		JJr   c                     y)Na  
@misc{wen2025matryoshkarevisitingsparsecoding,
      title={Beyond Matryoshka: Revisiting Sparse Coding for Adaptive Representation},
      author={Tiansheng Wen and Yifei Wang and Zequn Zeng and Zhong Peng and Yudi Su and Xinyang Liu and Bo Chen and Hongwei Liu and Stefanie Jegelka and Chenyu You},
      year={2025},
      eprint={2503.01776},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2503.01776},
}
 r>   s    r   citationzCSRLoss.citation   s    
r   )Ng?r@   )r   r   r   znn.Module | Noner   rA   rN   rA   )N)r    rD   rV   ztorch.Tensor | NonerB   rE   )rB   str)	rF   rG   rH   r   r!   r?   propertyr`   rI   rJ   s   @r   rL   rL      sH    9bx cg!BL_	 $K  r   rL   )r   torch.Tensorr   rc   rB   rc   )
__future__r   collections.abcr   torchtorch.nnnntorch.nn.functional
functionalr
   Nsentence_transformers.sparse_encoder.losses.SparseMultipleNegativesRankingLossr   2sentence_transformers.sparse_encoder.SparseEncoderr   r   Moduler   rL   r_   r   r   <module>rn      sL    " $     M
b#BII b#Jcbii cr   