
    rh*                        d dl mZ d dlmZmZ d dlZd dlZd dlm	Z	 d dlm
Z
mZ ddZddZddZdd	Zedd
       Zedd       ZddZddZddZddZy)    )annotations)AnyoverloadN)
coo_matrix)Tensordevicec                   t        | t              rwt        d | D              rOt        j                  | D cg c]0  }|j                         j                  t        j                        2 c}      S t        j                  |       } n%t        | t              st        j                  |       } | j                  r | j                  t        j                        S | S c c}w )a  
    Converts the input `a` to a PyTorch tensor if it is not already a tensor.
    Handles lists of sparse tensors by stacking them.

    Args:
        a (Union[list, np.ndarray, Tensor]): The input array or tensor.

    Returns:
        Tensor: The converted tensor.
    c              3  X   K   | ]"  }t        |t              xr |j                   $ y wN)
isinstancer   	is_sparse).0xs     t/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/util/tensor.py	<genexpr>z%_convert_to_tensor.<locals>.<genexpr>   s#     @z!V$44@s   (*dtype)r   listalltorchstackcoalescetofloat32tensorr   r   )ar   s     r   _convert_to_tensorr      s     !T@a@@;;aP

emm DPQQQA6"LLO{{tt%--t((H  Qs   5Cc                N    | j                         dk(  r| j                  d      } | S )z
    If the tensor `a` is 1-dimensional, it is unsqueezed to add a batch dimension.

    Args:
        a (Tensor): The input tensor.

    Returns:
        Tensor: The tensor with a batch dimension.
       r   )dim	unsqueezer   s    r   _convert_to_batchr#   $   s#     	uuw!|KKNH    c                d    t        |       } | j                         dk(  r| j                  d      } | S )a  
    Converts the input data to a tensor with a batch dimension.
    Handles lists of sparse tensors by stacking them.

    Args:
        a (Union[list, np.ndarray, Tensor]): The input data to be converted.

    Returns:
        Tensor: The converted tensor with a batch dimension.
    r   r   )r   r    r!   r"   s    r   _convert_to_batch_tensorr&   3   s-     	1Auuw!|KKNHr$   c                H   | j                   s,t        j                  j                  j	                  | dd      S | j                         } | j                         | j                         }}t        j                  | j                  d      | j                        }|j                  d|d   |dz         t        j                  |      j                  d|d         }|dkD  }|j                         }||xx   ||   z  cc<   t        j                  ||| j                               S )z
    Normalizes the embeddings matrix, so that each sentence embedding has unit length.

    Args:
        embeddings (Tensor): The input embeddings matrix.

    Returns:
        Tensor: The normalized embeddings matrix.
       r   )pr    r   r   )r   r   nn
functional	normalizer   indicesvalueszerossizer   
index_add_sqrtindex_selectclonesparse_coo_tensor)
embeddingsr.   r/   	row_normsmasknormalized_valuess         r   normalize_embeddingsr;   D   s     xx"",,Z1!,DD$$&J ((*J,=,=,?VG JOOA.z7H7HIIGAJ	2

9%221gajAI q=Ddy.""7,=z?PQQr$   c                     y r    r7   truncate_dims     r   truncate_embeddingsr@   a   s    Y\r$   c                     y r   r=   r>   s     r   r@   r@   e   s    ]`r$   c                    | dd|f   S )a  
    Truncates the embeddings matrix.

    Args:
        embeddings (Union[np.ndarray, torch.Tensor]): Embeddings to truncate.
        truncate_dim (Optional[int]): The dimension to truncate sentence embeddings to. `None` does no truncation.

    Example:
        >>> from sentence_transformers import SentenceTransformer
        >>> from sentence_transformers.util import truncate_embeddings
        >>> model = SentenceTransformer("tomaarsen/mpnet-base-nli-matryoshka")
        >>> embeddings = model.encode(["It's so nice outside!", "Today is a beautiful day.", "He drove to work earlier"])
        >>> embeddings.shape
        (3, 768)
        >>> model.similarity(embeddings, embeddings)
        tensor([[1.0000, 0.8100, 0.1426],
                [0.8100, 1.0000, 0.2121],
                [0.1426, 0.2121, 1.0000]])
        >>> truncated_embeddings = truncate_embeddings(embeddings, 128)
        >>> truncated_embeddings.shape
        >>> model.similarity(truncated_embeddings, truncated_embeddings)
        tensor([[1.0000, 0.8092, 0.1987],
                [0.8092, 1.0000, 0.2716],
                [0.1987, 0.2716, 1.0000]])

    Returns:
        Union[np.ndarray, torch.Tensor]: Truncated embeddings.
    .Nr=   r>   s     r   r@   r@   i   s    : c=L=())r$   c                .   || S t        | t        j                        rt        j                  |       } | j
                  \  }}| j                  }t        j                  t        j                  |       t        ||      d      \  }}t        j                  | t        j                        }t        j                  ||      j                  d      j                  dt        ||            }d||j                         |j                         f<   d| | <   | S )a  
    Keeps only the top-k values (in absolute terms) for each embedding and creates a sparse tensor.

    Args:
        embeddings (Union[np.ndarray, torch.Tensor]): Embeddings to sparsify by keeping only top_k values.
        max_active_dims (int): Number of values to keep as non-zeros per embedding.

    Returns:
        torch.Tensor: A sparse tensor containing only the top-k values per embedding.
    r   )kr    r   r*   Tr   )r   npndarrayr   r   shaper   topkabsmin
zeros_likeboolaranger!   expandflatten)	r7   max_active_dims
batch_sizer    r   _top_indicesr9   batch_indicess	            r   select_max_active_dimsrV      s     *bjj)\\*-
 &&OJF ZZ		* 5_c9RXYZNA{ Jejj9DLLF;EEaHOOPRTWXgilTmnM;?D			 +"5"5"7	78 Jur$   c                j    | D ]-  }t        | |   t              s| |   j                  |      | |<   / | S )au  
    Send a PyTorch batch (i.e., a dictionary of string keys to Tensors) to a device (e.g. "cpu", "cuda", "mps").

    Args:
        batch (Dict[str, Tensor]): The batch to send to the device.
        target_device (torch.device): The target device (e.g. "cpu", "cuda", "mps").

    Returns:
        Dict[str, Tensor]: The batch with tensors sent to the target device.
    )r   r   r   )batchtarget_devicekeys      r   batch_to_devicer[      s>      6eCj&)s}5E#J6 Lr$   c                   | j                         } | j                         j                         j                         }| j	                         j                         j                         }t        ||d   |d   ff| j                        S )Nr   r   )rH   )r   r.   cpunumpyr/   r   rH   )r   r.   r/   s      r   to_scipy_coor_      sf    	

Aiikoo%%'GXXZ^^##%Fv
GAJ78HHr$   )r   zlist | np.ndarray | Tensorreturnr   )r   r   r`   r   )r7   r   r`   r   )r7   
np.ndarrayr?   
int | Noner`   ra   )r7   torch.Tensorr?   rb   r`   rc   )r7   np.ndarray | torch.Tensorr?   rb   r`   rd   )r7   rd   rQ   rb   r`   rc   )rX   dict[str, Any]rY   r   r`   re   )r   r   r`   r   )
__future__r   typingr   r   r^   rF   r   scipy.sparser   r   r   r   r#   r&   r;   r@   rV   r[   r_   r=   r$   r   <module>ri      sc    "     #  2"R: 
 \ 
 \ 
 ` 
 `*@D"Ir$   