
    rh(                        d dl Z d dlZd dlmc mZ ddlmZ  e       rd dlZd dl	Z	d dl
mZmZ d dl	mZ dZdZ e e j"                  de            Zeeefvr ed	      i Zd
 Zd Z G d dej.                  j0                        Zej4                  Z G d dej.                  j0                        Zej4                  Zd ZddZ	 	 	 ddZ 	 	 	 	 	 ddZ!d Z"y)    N   )is_torch_npu_available)	rearrangerepeat)npu_rotary_mul   NPU_FA2_SPARSE_MODE)defaultzEnvironment variable `NPU_FA2_SPARSE_MODE` can only be set as 2 (top-left aligned causal mask) or 3 (down-right aligned causal mask).c                     | t         vrCt        j                  t        j                  ddg|       d      j	                         t         | <   t         |    S )z6Get or create attention mask for the specified device.i   device   )diagonal)ATTN_MASK_NPU_CACHEtorchtriuonesboolr   s    /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/integrations/npu_flash_attention.pyget_attn_mask_npur   ,   sF    ((&+jjT4LQW1Xcd&e&j&j&lF#v&&    c                  4    t               rt        t        k(  S dS )NF)r   SPARSE_MODE!TOP_LEFT_ALIGNED_CAUSAL_MASK_MODE r   r   'is_npu_fa2_top_left_aligned_causal_maskr   3   s    ?U?W;;;b]bbr   c                   ,    e Zd Zed        Zed        Zy)IndexFirstAxisc           
      *   | j                  |       |j                  dk\  sJ |j                  d   |j                  dd  c| _        }|j	                         } t        j                  t        |d      dt        |d|            j                  dg| S )Nr   r   r   b ... -> b (...)z -> z dd)
save_for_backwardndimshapefirst_axis_dimnumelr   gatherr   r   reshape)ctxinputindicesother_shape
second_dims        r   forwardzIndexFirstAxis.forward9   s    g&zzQ*/++a.%++ab/'K &&(
u||e/0!VGZS]5^

'"$"$ 	$r   c           	         | j                   \  }|j                  dk\  sJ |j                  dd  }t        |d      }t	        j
                  | j                  |j                  d   g|j                  |j                        }|j                  dt        |d|j                  d         |        |j                  | j                  g| d fS )Nr   r   r    r   dtyper   r!   r"   )saved_tensorsr&   r'   r   r   zerosr(   r   r4   scatter_r   r+   )r,   grad_outputr.   r/   
grad_inputs        r   backwardzIndexFirstAxis.backwardE   s    &&
1$$$!''+-?@[[!2!21!56%%##

 	Avgz[=N=Nq=QRT_`!z!!#"4"4C{CTIIr   N__name__
__module____qualname__staticmethodr1   r:   r   r   r   r   r   8   s*    	$ 	$ J Jr   r   c                   ,    e Zd Zed        Zed        Zy)IndexPutFirstAxisc                     | j                  |       |j                  dk(  sJ |j                  dk\  sJ t        j                  |g|j                  dd  |j
                  |j                  d}|||<   |S )Nr   r   r3   )r%   r&   r   r6   r'   r   r4   )r,   valuesr.   r(   outputs        r   r1   zIndexPutFirstAxis.forward[   sr    g&||q   {{a^ifll12.>iv}}\b\h\hi wr   c                 2    | j                   \  }||   }|d d fS N)r5   )r,   r8   r.   grad_valuess       r   r:   zIndexPutFirstAxis.backwardf   s&    &&
!'*D$&&r   Nr;   r   r   r   rA   rA   Z   s(      ' 'r   rA   c                 >    t        | |||z        }t        |d|      S )a  
    Arguments:
        hidden_states: (total_nnz, ...), where total_nnz = number of tokens in selected in attention_mask.
        indices: (total_nnz), the indices that represent the non-masked tokens of the original padded input sequence.
        batch: int, batch size for the padded sequence.
        seqlen: int, maximum sequence length for the padded sequence.
    Return:
        hidden_states: (batch, seqlen, ...)
    z(b s) ... -> b s ...)b)index_put_first_axisr   )hidden_statesr.   batchseqlenrD   s        r   	pad_inputrN   s   s&     "-%&.IFV3u==r   c                    |||z   n|}|j                  dt        j                        }|j                  dt        j                        }t        j                  |j	                         d      j	                         }|j                         j                         }t        j                  t        j                  |dt        j                        d      }t        t        | d      |      ||||fS )a  
    Arguments:
        hidden_states: (batch, seqlen, ...)
        attention_mask: (batch, seqlen), bool / int, 1 means valid and 0 means not valid.
        unused_mask: (batch, seqlen), bool / int, 1 means the element is allocated but unused.
    Return:
        hidden_states: (total_nnz, ...), where total_nnz = number of tokens selected in attention_mask + unused_mask.
        indices: (total_nnz), the indices of masked tokens from the flattened input sequence.
        cu_seqlens: (batch + 1), the cumulative sequence lengths, used to index into hidden_states.
        max_seqlen_in_batch: int
        seqused: (batch), returns the number of tokens selected in attention_mask + unused_mask.
    r$   )dimr4   F)as_tupler   )r   r   zb s ... -> (b s) ...)sumr   int32nonzeroflattenmaxitemFpadcumsumindex_first_axisr   )	rK   attention_maskunused_mask	all_masksseqlens_in_batchused_seqlens_in_batchr.   max_seqlen_in_batch
cu_seqlenss	            r   unpad_inputrc      s     3>2I+-~I }}5;;}?*..2U[[.ImmI--/%@HHJG*..0557u||$4!5;;OQWXJ 	=2HI7S r   c                 T   d|z
  }|%dt        j                  | j                  d         z  }|s0| j                  d   }t        j                  | |||d||      d   }	|	S t        | j                        }
| j                  d   }t        j                  | |||d|||
t        	      d   }	|	S )N      ?r$   r   BSND)	keep_probscaler   )rg   rh   
atten_masksparse_mode)mathsqrtr'   	torch_npunpu_fusion_attentionr   r   r   )qkv	dropout_psoftmax_scalecausalkwargsrg   head_numrD   attn_mask_npus              r   npu_flash_attn_funcrx      s     iIdii44771://1a6U^fstuvw  M *!((3771://$#

 
 Mr   c
                    d|z
  }|%dt        j                  | j                  d         z  }|	s| j                  d   }t        j                  | |||d d ||dt        |dd  j                         j                         j                               t        |dd  j                         j                         j                                     d   }|S t        | j                        }| j                  d   }t        j                  | |||d d |||dt        |dd  j                         j                         j                               t        |dd  j                         j                         j                               t              d   }|S )Nre   r$   r   TND)pseri   rh   rg   input_layoutactual_seq_qlenactual_seq_kvlenr   )	r{   padding_maskri   rh   rg   r|   r}   r~   rj   )rk   rl   r'   rm   rn   tuplecpunumpytolistr   r   r   )ro   rp   rq   cu_seqlens_qcu_seqlens_kmax_seqlen_qmax_seqlen_krr   rs   rt   ru   rg   rv   rD   rw   s                  r   npu_flash_attn_varlen_funcr      s    iIdii44771://!,qr"2"6"6"8">">"@"G"G"IJ"<#3#7#7#9#?#?#A#H#H#JK
 @ M% *!((3771://$!,qr"2"6"6"8">">"@"G"G"IJ"<#3#7#7#9#?#?#A#H#H#JK#
   Mr   c                    t        |j                        dk(  rT|j                  d   | j                  d   dz  k(  r2|j                  dd      }|j                  d      j                  d      }t        |j                        dk(  rT|j                  d   | j                  d   dz  k(  r2|j                  dd      }|j                  d      j                  d      }t	        | ||      S )Nr   r$   r   r   )lenr'   r   	unsqueezer   )xcossinru   s       r   npu_apply_rotary_embr      s    
399~syy}q0@@jjAmmA((+ 399~syy}q0@@jjAmmA((+!S#&&r   rF   )        NF)NNr   NF)#osr   torch.nn.functionalnn
functionalrX   utils.import_utilsr   rk   rm   einopsr   r   r   r   #DOWN_RIGHT_ALIGNED_CAUSAL_MASK_MODEintgetenvr   
ValueErrorr   r   r   autogradFunctionr   applyr[   rA   rJ   rN   rc   rx   r   r   r   r   r   <module>r      s   
    7 ((
 %& !&' #)"))1;^_`8:]^^
	1 
  'c
JU^^,, J< "'' '// '* ).. >$J  R 4n'r   