
    rh                    j    d dl mZ d dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ  G d d	e      Zy)
    )annotations)abstractmethod)AnyN)	Tokenizer)PreTrainedTokenizerBase)Modulec                  F    e Zd ZU dZdZded<   ded<   	 ed
d       ZddZy	)InputModuleaA  
    Subclass of :class:`sentence_transformers.models.Module`, base class for all input modules in the Sentence
    Transformers library, i.e. modules that are used to process inputs and optionally also perform processing
    in the forward pass.

    This class provides a common interface for all input modules, including methods for loading and saving the module's
    configuration and weights, as well as input processing. It also provides a method for performing the forward pass
    of the module.

    Three abstract methods are defined in this class, which must be implemented by subclasses:

    - :meth:`sentence_transformers.models.Module.forward`: The forward pass of the module.
    - :meth:`sentence_transformers.models.Module.save`: Save the module to disk.
    - :meth:`sentence_transformers.models.InputModule.tokenize`: Tokenize the input texts and return a dictionary of tokenized features.

    Optionally, you may also have to override:

    - :meth:`sentence_transformers.models.Module.load`: Load the module from disk.

    To assist with loading and saving the module, several utility methods are provided:

    - :meth:`sentence_transformers.models.Module.load_config`: Load the module's configuration from a JSON file.
    - :meth:`sentence_transformers.models.Module.load_file_path`: Load a file from the module's directory, regardless of whether the module is saved locally or on Hugging Face.
    - :meth:`sentence_transformers.models.Module.load_dir_path`: Load a directory from the module's directory, regardless of whether the module is saved locally or on Hugging Face.
    - :meth:`sentence_transformers.models.Module.load_torch_weights`: Load the PyTorch weights of the module, regardless of whether the module is saved locally or on Hugging Face.
    - :meth:`sentence_transformers.models.Module.save_config`: Save the module's configuration to a JSON file.
    - :meth:`sentence_transformers.models.Module.save_torch_weights`: Save the PyTorch weights of the module.
    - :meth:`sentence_transformers.models.InputModule.save_tokenizer`: Save the tokenizer used by the module.
    - :meth:`sentence_transformers.models.Module.get_config_dict`: Get the module's configuration as a dictionary.

    And several class variables are defined to assist with loading and saving the module:

    - :attr:`sentence_transformers.models.Module.config_file_name`: The name of the configuration file used to save the module's configuration.
    - :attr:`sentence_transformers.models.Module.config_keys`: A list of keys used to save the module's configuration.
    - :attr:`sentence_transformers.models.InputModule.save_in_root`: Whether to save the module's configuration in the root directory of the model or in a subdirectory named after the module.
    - :attr:`sentence_transformers.models.InputModule.tokenizer`: The tokenizer used by the module.
    Tboolsave_in_rootz#PreTrainedTokenizerBase | Tokenizer	tokenizerc                     y)a  
        Tokenizes the input texts and returns a dictionary of tokenized features.

        Args:
            texts (list[str]): List of input texts to tokenize.
            **kwargs: Additional keyword arguments for tokenization, e.g. ``task``.

        Returns:
            dict[str, torch.Tensor | Any]: Dictionary containing tokenized features, e.g.
                ``{"input_ids": ..., "attention_mask": ...}``
        N )selftextskwargss      {/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/models/InputModule.pytokenizezInputModule.tokenize<   s        c                    t        | d      syt        | j                  t              r | j                  j                  |fi | yt        | j                  t
              r | j                  j                  |fi | y)z
        Saves the tokenizer to the specified output path.

        Args:
            output_path (str): Path to save the tokenizer.
            **kwargs: Additional keyword arguments for saving the tokenizer.

        Returns:
            None
        r   N)hasattr
isinstancer   r   save_pretrainedr   save)r   output_pathr   s      r   save_tokenizerzInputModule.save_tokenizerJ   sj     t[)dnn&=>*DNN**;A&A 	 	2DNN6v6r   N)r   z	list[str]returnzdict[str, torch.Tensor | Any])r   strr   None)	__name__
__module____qualname____doc__r   __annotations__r   r   r   r   r   r   r
   r
      s8    $L L$22  r   r
   )
__future__r   abcr   typingr   torch
tokenizersr   $transformers.tokenization_utils_baser   #sentence_transformers.models.Moduler   r
   r   r   r   <module>r,      s(    "      H 6O& Or   