
    rh                        d dl mZ d dlZddlmZmZ ddlmZ  ej                  e	      Z
 edd      Z ed	d      Z e       Zd
ej                  dedej                  fdZdeej                     dej                  defdZ	 	 	 ddej&                  j(                  dej                  dej                  dej                  deej                     dedee   dee   deej                  df   fdZy)    )OptionalN   )is_torch_xpu_availablelogging)is_torch_greater_or_equalz2.5T)
accept_devz2.8hidden_statesn_repreturnc                     | j                   \  }}}}|dk(  r| S | dddddddddf   j                  |||||      } | j                  |||z  ||      S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
       N)shapeexpandreshape)r	   r
   batchnum_key_value_headsslenhead_dims         {/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/integrations/sdpa_attention.py	repeat_kvr      so    
 2?1D1D.Ehz!!Qa"23::5BUW\^bdlmM  (;e(CT8TT    attention_maskkeyc                     t         r-t        xr% t        |t        j                  j
                         S t        xr+ | d u xr% t        |t        j                  j
                         S )N)_is_torch_xpu_available#_is_torch_greater_or_equal_than_2_8
isinstancetorchfxProxy#_is_torch_greater_or_equal_than_2_5)r   r   s     r   use_gqa_in_sdpar"      sR     2Z:c588>>;Z7ZZ.q>T3IqR\]`bgbjbjbpbpRqNqqr   modulequeryvaluedropoutscaling	is_causalc                 R   |j                  dd      s|j                  d      t        j                  d       i }	t        | d      r=t	        ||      s-t        || j                        }t        || j                        }nddi}	|-|j                  dk(  r|d d d d d d d |j                  d	   f   }|j                         }|j                         }|j                         }|'|j                  d
   dkD  xr |d u xr t        | dd      }t        j                  j                         r*t        |t        j                        r|j!                         }t        j"                  j$                  j&                  |||f||||d|	}
|
j)                  dd
      j                         }
|
d fS )Noutput_attentionsF	head_maskz`sdpa` attention does not support `output_attentions=True` or `head_mask`. Please set your attention to `eager` if you want any of these features.num_key_value_groups
enable_gqaT   r   r   r(   )	attn_mask	dropout_pscaler(   )getloggerwarning_oncehasattrr"   r   r,   ndimr   
contiguousgetattrr   jit
is_tracingr   Tensoritemnn
functionalscaled_dot_product_attention	transpose)r#   r$   r   r%   r   r&   r'   r(   kwargssdpa_kwargsattn_outputs              r   sdpa_attention_forwardrE   +   s    zz%u-K1H1TW	
 Kv-.~s3C!<!<=CeV%@%@AE'.K!n&9&9Q&>'1a399R=(@A E
..
CE
  KKNQ&h>T+AhgfVacgFh	 yy*Y"ENN$	((%%BB	 !	 	K ''1-88:Kr   )g        NN)typingr   r   utilsr   r   utils.import_utilsr   
get_logger__name__r4   r!   r   r   r<   intr   boolr"   r>   ModulefloattuplerE    r   r   <module>rQ      s8     3 : 
		H	% '@RV&W #&?RV&W #02 	UU\\ 	U# 	U%,, 	UrHU\\$: r rRV r( # $:HHOO:<<: 
: <<	:
 U\\*: : e_: ~: 5<<:r   