
    rhv`                    `   d dl mZ d dlZd dlZd dlmZmZ d dlZd dlm	Z
 d dlmZ d dlmZmZmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZ d dlmZmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z)m*Z*  e)       rd dl+m,Z,m-Z-m.Z.m/Z/  ej`                  e1      Z2 G d de'      Z3y)    )annotationsN)AnyCallable)parse)nn)EvalPredictionPreTrainedTokenizerBaseTrainerCallback)__version__)WandbCallback)SentenceEvaluatorSequentialEvaluator)Router)(SpladeRegularizerWeightSchedulerCallback)SparseEncoderDataCollator)"SparseMultipleNegativesRankingLoss
SpladeLoss)SparseEncoderModelCardCallbackSparseEncoder)SparseEncoderTrainingArguments)SentenceTransformerTrainer)is_datasets_availableis_training_available)DatasetDatasetDictIterableDatasetValuec                       e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 d		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d
 fdZddZdd fdZddZ	 	 	 	 	 	 ddZ	 	 d	 	 	 	 	 	 	 d fdZ		 d	 	 	 	 	 d fdZ
 xZS )SparseEncoderTraineru@  
    SparseEncoderTrainer is a simple but feature-complete training and eval loop for PyTorch
    based on the SentenceTransformerTrainer that based on 🤗 Transformers :class:`~transformers.Trainer`.

    This trainer integrates support for various :class:`transformers.TrainerCallback` subclasses, such as:

    - :class:`~transformers.integrations.WandbCallback` to automatically log training metrics to W&B if `wandb` is installed
    - :class:`~transformers.integrations.TensorBoardCallback` to log training metrics to TensorBoard if `tensorboard` is accessible.
    - :class:`~transformers.integrations.CodeCarbonCallback` to track the carbon emissions of your model during training if `codecarbon` is installed.

        - Note: These carbon emissions will be included in your automatically generated model card.

    See the Transformers `Callbacks <https://huggingface.co/docs/transformers/main/en/main_classes/callback>`_
    documentation for more information on the integrated callbacks and how to write your own callbacks.

    Args:
        model (:class:`~sentence_transformers.SparseEncoder`, *optional*):
            The model to train, evaluate or use for predictions. If not provided, a `model_init` must be passed.
        args (:class:`~sentence_transformers.sparse_encoder.training_args.SparseEncoderTrainingArguments`, *optional*):
            The arguments to tweak for training. Will default to a basic instance of
            :class:`~sentence_transformers.sparse_encoder.training_args.SparseEncoderTrainingArguments` with the
            `output_dir` set to a directory named *tmp_trainer* in the current directory if not provided.
        train_dataset (Union[:class:`datasets.Dataset`, :class:`datasets.DatasetDict`, :class:`datasets.IterableDataset`, Dict[str, :class:`datasets.Dataset`]], *optional*):
            The dataset to use for training. Must have a format accepted by your loss function, see
            `Training Overview > Dataset Format <../../../docs/sentence_transformer/training_overview.html#dataset-format>`_.
        eval_dataset (Union[:class:`datasets.Dataset`, :class:`datasets.DatasetDict`, :class:`datasets.IterableDataset`, Dict[str, :class:`datasets.Dataset`]], *optional*):
            The dataset to use for evaluation. Must have a format accepted by your loss function, see
            `Training Overview > Dataset Format <../../../docs/sentence_transformer/training_overview.html#dataset-format>`_.
        loss (Optional[Union[:class:`torch.nn.Module`, Dict[str, :class:`torch.nn.Module`],            Callable[[:class:`~sentence_transformers.SparseEncoder`], :class:`torch.nn.Module`],            Dict[str, Callable[[:class:`~sentence_transformers.SparseEncoder`]]]], *optional*):
            The loss function to use for training. Can either be a loss class instance, a dictionary mapping
            dataset names to loss class instances, a function that returns a loss class instance given a model,
            or a dictionary mapping dataset names to functions that return a loss class instance given a model.
            In practice, the latter two are primarily used for hyper-parameter optimization. Will default to
            :class:`~sentence_transformers.sparse_encoder.losses.SparseMultipleNegativesRankingLoss` if no ``loss`` is provided.
        evaluator (Union[:class:`~sentence_transformers.evaluation.SentenceEvaluator`,            List[:class:`~sentence_transformers.evaluation.SentenceEvaluator`]], *optional*):
            The evaluator instance for useful evaluation metrics during training. You can use an ``evaluator`` with
            or without an ``eval_dataset``, and vice versa. Generally, the metrics that an ``evaluator`` returns
            are more useful than the loss value returned from the ``eval_dataset``. A list of evaluators will be
            wrapped in a :class:`~sentence_transformers.evaluation.SequentialEvaluator` to run them sequentially.
        callbacks (List of [:class:`transformers.TrainerCallback`], *optional*):
            A list of callbacks to customize the training loop. Will add those to the list of default callbacks
            detailed in [here](callback).

            If you want to remove one of the default callbacks used, use the [`Trainer.remove_callback`] method.
        optimizers (`Tuple[:class:`torch.optim.Optimizer`, :class:`torch.optim.lr_scheduler.LambdaLR`]`, *optional*, defaults to `(None, None)`):
            A tuple containing the optimizer and the scheduler to use. Will default to an instance of :class:`torch.optim.AdamW`
            on your model and a scheduler given by :func:`transformers.get_linear_schedule_with_warmup` controlled by `args`.

    Important attributes:

        - **model** -- Always points to the core model. If using a transformers model, it will be a [`PreTrainedModel`]
          subclass.
        - **model_wrapped** -- Always points to the most external model in case one or more other modules wrap the
          original model. This is the model that should be used for the forward pass. For example, under `DeepSpeed`,
          the inner model is wrapped in `DeepSpeed` and then again in `torch.nn.DistributedDataParallel`. If the inner
          model hasn't been wrapped, then `self.model_wrapped` is the same as `self.model`.
        - **is_model_parallel** -- Whether or not a model has been switched to a model parallel mode (different from
          data parallelism, this means some of the model layers are split on different GPUs).
        - **place_model_on_device** -- Whether or not to automatically place the model on the device - it will be set
          to `False` if model parallel or deepspeed is used, or if the default
          `TrainingArguments.place_model_on_device` is overridden to return `False` .
        - **is_in_train** -- Whether or not a model is currently running `train` (e.g. when `evaluate` is called while
          in `train`)
    c                h   t               st        d      |(d}t        j                  d| d       t	        |      }nt        |t              st        d      |%|	|	| _        | j                         }n)t        d      |	t        j                  d       |	| _        |
t        j                  d	       t	        d
      j                         }|j                  r;|j                  j                  s%|j                  j                  |j                         |2t        |d      r&t        |j                   t"              r|j                   }|t%        |j&                  |j(                  |j*                  t        |d      rt-        |j.                        n	t-                     }t0        |j3                         D cg c]  }|j4                   c}v r|j(                  st        d      t7        ddg||g      D ]  \  }}t        |t8              s|j:                  $t=        t?        |            }t@        dtB        dtD        dtF        di}|jI                         D ci c]*  \  }}|tK        |jM                  tO        |      d            , }}}t        d| d| d| d| d	       t        |tP              rt        |tR              stS        |      }t        |tP              rt        |tR              stS        |      }| j                  rd n|||||||nd|	|
|||d
}tU        tV              tU        d      k\  r||d<   n||d<   |*|(|jX                  dk7  rt        d |jX                   d!      t[        t\        |   d7i | | j`                  dk(  rd | _0        i i d"| _1        d#| _2        t        | jf                  d$      r| ji                         | jf                  _5        |  |  |  tm        | jn                  jp                  D cg c]  }t        |tr               c}      r tt        jv                  jy                  d%d&       |.t        j                  d'       t{        |t}        |(      d)d*+      }t        |tP              r|jI                         D ci c]  \  }}|| j                  ||       c}}| _@        t7        ddg||g      D ]  \  }}|	t        |tP              st        d,| d-      t-        |j                               t-        |j                               z
  x}s^t        d.| d/t        |       d0t        |      d1k(  rd2nd3 d4| d5	       n| j                  ||      | _@        |t        |t              st        |      }|| _F        | j                  .| j                  ||j*                  |j(                  d6      | _G        | j`                  .| j                  ||j*                  |j(                  d6      | _0        | j                  |       y c c}w c c}}w c c}w c c}}w )8NzTo train a SparseEncoder model, you need to install the `accelerate` and `datasets` modules. You can do so with the `train` extra:
pip install -U "sentence-transformers[train]"tmp_trainerz>No `SparseEncoderTrainingArguments` passed, using `output_dir=z`.)
output_dirzRPlease use `SparseEncoderTrainingArguments` imported from `sentence_transformers`.z<`Trainer` requires either a `model` or `model_init` argumentz`Trainer` requires either a `model` or `model_init` argument, but not both. `model_init` will overwrite your model when calling the `train` method.z`compute_metrics` is currently not compatible with the SparseEncoderTrainer. Please use the `evaluator` argument instead for detailed evaluation metrics, or the `eval_dataset` argument for the evaluation loss.unused	tokenizerall_special_ids)tokenize_fnrouter_mappingpromptsr&   al  You are using a Router module in your model, but you did not provide a `router_mapping` in the training arguments. This means that the Router module will not be able to route the inputs to the correct submodules. Please provide a `router_mapping` that maps column names to routes, e.g. {'column_one': 'query', 'column_two': 'document', 'column_three': 'document'}.trainevalstringint64float32boolnullzThe provided `z6_dataset` must have Features. Specify them with e.g.:
z_dataset = z_dataset.cast(Features(z))
or by providing the Features to the IterableDataset initialization method. See the Datasets documentation for more information on dataset Features: https://huggingface.co/docs/datasets/en/about_dataset_featuresdummy)
modelargsdata_collatortrain_dataseteval_dataset
model_initcompute_metrics	callbacks
optimizerspreprocess_logits_for_metricsz4.46.0processing_classnoz%You have set `args.eval_strategy` to z, but you didn't provide an `eval_dataset` or an `evaluator`. Either provide an `eval_dataset` or an `evaluator` to `SparseEncoderTrainer`, or set `args.eval_strategy='no'` to skip evaluation.)r*   r+   Tinclude_prompt_lengthsWANDB_PROJECTzsentence-transformersaR  No `loss` passed, using `sentence_transformers.sparse_encoder.losses.SpladeLoss` as a default option. with `SparseMultipleNegativesRankingLoss` as the default loss function.Be careful, we also set the `query_regularizer_weight` and `document_regularizer_weight`, but this are really sensitive parameters and should be tuned for your task.)r2   g-C6
?giUMu>)r2   lossquery_regularizer_weightdocument_regularizer_weightz,If the provided `loss` is a dict, then the `z"_dataset` must be a `DatasetDict`.z:If the provided `loss` is a dict, then all keys from the `z;_dataset` dictionary must occur in `loss` also. Currently, z occur   s z in `z_dataset` but not in `loss`.)r)   r(   dataset_name )Jr   RuntimeErrorloggerinfor   
isinstance
ValueErrorr7   call_model_initwarningto_dicthub_model_idmodel_card_datamodel_idset_model_idhasattrr%   r	   r   tokenizer(   r)   setr&   r   children	__class__zipr   column_namesnextiterstrintfloatr/   itemsr   gettypedictr   parse_versiontransformers_versioneval_strategysuperr   __init__r6   accum_loss_componentscan_return_lossr4   _include_prompt_lengthr>   anycallback_handlerr9   r   osenviron
setdefaultr   r   prepare_lossr@   keyssortedlenr   r   	evaluatorr5   preprocess_datasetadd_model_card_callback)selfr2   r3   r5   r6   r@   ru   r4   r%   r7   r8   r9   r:   r;   r#   default_args_dictmodulerF   datasetsamplenaive_type_mappingkeyvalueexample_featuressuper_kwargscallbackloss_fnmissingrX   s                               /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/sparse_encoder/trainer.pyrh   zSparseEncoderTrainer.__init__d   sM   2 %&@  <&JKKXYcXddfgh1ZHDD"@Aqrr=%",,,."#abb%M )DO&NN' ;hOWWY U%:%:%C%C!!..t/@/@A!<EOO]tAuI 5!NN#22BI)UfBgI$=$= >mpmr	M 9IJv&**JJSWSfSf j  &)'6):]L<Y%Z 	!L''?38L8L8Td7m,9&	&" _e^j^j^l$PZPSUZC155d5k6JKK$  $ !$\N2i#nK~=TUeTf gUU 	( mT*:m[3Y'6MlD)*\;2W&|4L "__T%**-9-EIZ\`g$."$-J
 -.-2II/8L+,(1L% I$5$:L:LPT:T78J8J7K LG G  	($8H<H' $D 022%>"  $4%%'?@8<8S8S8UD5DDYDYDcDcd
8]3deJJ!!/3JK<KKx
 7eD)-,0	D dD!fjfpfpfrsMb\[bt'8'8%'HHsDI),gv->P\@])^ %g?!'40$F|nTvw  "',,.1C		4DDD7D$TUaTb c&&,Wo%6fCLTUDUS[]<^^cdpcq  rNO  ))$6DI  I?P)Q+I6I")!%!8!8t||DDWDWfm "9 "D ( $ 7 7dll4CVCVek !8 !D 	$$%67E K"$F e" ts   <X/X#X)X.c                    t        |      }| j                  |       |j                  | j                  | j                  | j
                  | j                  |        y)a  
        Add a callback responsible for automatically tracking data required for the automatic model card generation

        This method is called in the ``__init__`` method of the
        :class:`~sentence_transformers.sparse_encoder.trainer.SparseEncoderTrainer` class.

        Args:
            default_args_dict (Dict[str, Any]): A dictionary of the default training arguments, so we can determine
                which arguments have been changed for the model card.
        )r2   trainerN)r   add_callbackon_init_endr3   statecontrolr2   )rx   ry   model_card_callbacks      r   rw   z,SparseEncoderTrainer.add_model_card_callback8  sL     ==NO-.''		4::t||SWS]S]gk'l    c                $    t         |   |      S )N)trial)rg   rM   )rx   r   rX   s     r   rM   z$SparseEncoderTrainer.call_model_initH  s    w&U&33r   c           	         ddl m} |j                         D ]`  \  }}|dk(  rt        ||      r||_        t        |t
        j                  j                        sDt        ||| j                  ||             b |S )Nr   r   r2   )
sentence_transformersr   named_childrenrK   r2   torchr   Modulesetattroverride_model_in_loss)rx   r@   r2   r   namechilds         r   r   z+SparseEncoderTrainer.override_model_in_lossK  sl    7..0 	OKD%w:e]#C"
E588??3dD$?$?u$MN		O
 r   c                Z   t        |t        j                  j                        r|j	                  |j
                        }n! ||      j	                  |j
                        }|t        |t              nd}d }t        | j                  j                        D ]  \  }}t        |t              s|} n |rv||dkD  ro|&| j                  j                  j                  |      }n!t        j                  d       t        |      }| j                  j                  j                  d|       |S )NFrC   zSpladeLoss detected without SpladeRegularizerWeightSchedulerCallback. Adding default SpladeRegularizerWeightSchedulerCallback to gradually increase weight values from 0 to their maximum.)r@   )rK   r   r   r   todevicer   	enumeraterm   r9   r   poprI   rN   insert)rx   r@   r2   is_splade_losssplade_scheduler_callback_indexidxr   splade_callbacks           r   rq   z!SparseEncoderTrainer.prepare_lossU  s	   
 dEHHOO,775<<(D;>>%,,/D9=9ID*5u*.'&t'<'<'F'FG 	MC($LM25/	 >FJilmJm.:"&"7"7"A"A"E"EFe"f K #KPT"U!!++221oFr   c                *    t         |   ||||      S )a  
        Computes the loss for the SparseEncoder model.

        It uses ``self.loss`` to compute the loss, which can be a single loss function or a dictionary of loss functions
        for different datasets. If the loss is a dictionary, the dataset name is expected to be passed in the inputs
        under the key "dataset_name". This is done automatically in the ``add_dataset_name_column`` method.
        Note that even if ``return_outputs = True``, the outputs will be empty, as the SparseEncoder losses do not
        return outputs.

        Args:
            model (SparseEncoder): The SparseEncoder model.
            inputs (Dict[str, Union[torch.Tensor, Any]]): The input data for the model.
            return_outputs (bool, optional): Whether to return the outputs along with the loss. Defaults to False.
            num_items_in_batch (int, optional): The number of items in the batch. Defaults to None. Unused, but required by the transformers Trainer.

        Returns:
            Union[torch.Tensor, Tuple[torch.Tensor, Dict[str, Any]]]: The computed loss. If `return_outputs` is True, returns a tuple of loss and outputs. Otherwise, returns only the loss.
        )r2   inputsreturn_outputsnum_items_in_batch)rg   compute_loss)rx   r2   r   r   r   rX   s        r   r   z!SparseEncoderTrainer.compute_lossw  s&    2 w#~Zl $ 
 	
r   c                &    t         |   ||      S )a9  
        We have to override the optimizer_grouped_parameters because the Trainer superclass bases it on the `model`
        itself, but the SparseEncoder losses can have weights that should be updated as well, e.g.
        SoftmaxLoss (see #2872).

        This method requires `transformers` >= 4.43.0.
        )r3   r2   )rg   get_optimizer_cls_and_kwargs)rx   r3   r2   rX   s      r   r   z1SparseEncoderTrainer.get_optimizer_cls_and_kwargs  s     w3U3KKr   )NNNNNNNNNNN)NNN)r2   SparseEncoder | Noner3   z%SparseEncoderTrainingArguments | Noner5   1Dataset | DatasetDict | dict[str, Dataset] | Noner6   r   r@   znn.Module | dict[str, nn.Module] | Callable[[SparseEncoder], torch.nn.Module] | dict[str, Callable[[SparseEncoder], torch.nn.Module]] | Noneru   z2SentenceEvaluator | list[SentenceEvaluator] | Noner4   z SparseEncoderDataCollator | Noner%   z)PreTrainedTokenizerBase | Callable | Noner7   z"Callable[[], SparseEncoder] | Noner8   z'Callable[[EvalPrediction], dict] | Noner9   zlist[TrainerCallback] | Noner:   z?tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]r;   z;Callable[[torch.Tensor, torch.Tensor], torch.Tensor] | NonereturnNone)ry   zdict[str, Any]r   r   )N)r   r   )r@   torch.nn.Moduler2   r   r   r   )r@   z<Callable[[SparseEncoder], torch.nn.Module] | torch.nn.Moduler2   r   r   r   )FN)r2   r   r   zdict[str, torch.Tensor | Any]r   r/   r   z2torch.Tensor | tuple[torch.Tensor, dict[str, Any]])r3   r   r2   r   r   ztuple[Any, Any])__name__
__module____qualname____doc__rh   rw   rM   r   rq   r   r   __classcell__)rX   s   @r   r    r       st   BL '+6:KOJN HL:>?C9=CG26W
 hl/R8#R8 4R8 I	R8
 HR8R8 FR8 8R8 =R8  7!R8" A#R8$ 0%R8& T'R8. )d/R80 
1R8hm 4 J    
	 L  %

 .
 	
 
<
< SW
L2
L;O
L	
L 
Lr   r    )4
__future__r   loggingrn   typingr   r   r   packaging.versionr   rd   r   transformersr   r	   r
   r   re   transformers.integrationsr    sentence_transformers.evaluationr   r   sentence_transformers.modelsr   ?sentence_transformers.sparse_encoder.callbacks.splade_callbacksr   2sentence_transformers.sparse_encoder.data_collatorr   +sentence_transformers.sparse_encoder.lossesr   r   /sentence_transformers.sparse_encoder.model_cardr   2sentence_transformers.sparse_encoder.SparseEncoderr   2sentence_transformers.sparse_encoder.training_argsr   sentence_transformers.trainerr   sentence_transformers.utilr   r   datasetsr   r   r   r   	getLoggerr   rI   r    rG   r   r   <module>r      ss    "  	    4  Q Q < 3 S / t X f Z L ] D SEE			8	$L5 Lr   