
    rh,                        d dl mZmZ d dlZd dlmZ ddlmZmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZmZmZmZmZmZmZm Z  ddl!m"Z"  ejF                  e$      Z%dZ&dZ' G d de      Z( G d de      Z) G d de      Z* G d de      Z+ G d de      Z, G d de      Z- G d de      Z. G d  d!e      Z/g d"Z0y)#    )CallableOptionalN   )CacheDynamicCache)create_causal_mask)GradientCheckpointingLayer)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging   )CLIPMLP)LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassification
LlamaModelLlamaRotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )	PhiConfigzmicrosoft/phi-1r   c                   (    e Zd Zdedef fdZ	 	 ddej                  deej                  ej                  f   de	ej                     de	e
   de	ej                     d	eej                  e	ej                     e	eej                        f   fd
Z xZS )PhiAttentionconfig	layer_idxc                    t         |   ||       t        j                  |j                  |j
                  | j                  z  d      | _        t        j                  |j                  |j                  | j                  z  d      | _	        t        j                  |j                  |j                  | j                  z  d      | _
        t        j                  |j
                  | j                  z  |j                  d      | _        | `t        | j                  |j                  z        | _        |j                   | _        | j                   r}t        j"                  |j                  |j
                  z  |j$                  d      | _        t        j"                  |j                  |j
                  z  |j$                  d      | _        y y )NTbias)epselementwise_affine)super__init__nnLinearhidden_sizenum_attention_headshead_dimq_projnum_key_value_headsk_projv_projdenseo_projintpartial_rotary_factorrotary_ndimsqk_layernorm	LayerNormlayer_norm_epsq_layernormk_layernormselfr   r   	__class__s      v/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/phi/modular_phi.pyr%   zPhiAttention.__init__$   sb   +ii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijYYv99DMMI6K]K]dhi
K0L0L LM"//!||""f&@&@@fF[F[pt D  "||""f&@&@@fF[F[pt D	     hidden_statesposition_embeddingsattention_maskpast_key_valuecache_positionreturnc                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }| j                  r"| j                  |	      }	| j                  |
      }
|\  }}|	dd | j                  f   |	d| j                  d f   }}|
dd | j                  f   |
d| j                  d f   }}t        ||||      \  }}t        j                  ||fd      }	t        j                  ||fd      }
|'|||d}|j                  |
|| j                  |      \  }
}t         }| j"                  j$                  dk7  rt&        | j"                  j$                     } || |	|
||f| j(                  sdn| j*                  | j,                  d	|\  }} |j.                  g |d j1                         }| j3                  |      }||fS )
Nr   r   .)dim)sincosrB   eagerg        )dropoutscaling)shaper*   r+   view	transposer-   r.   r4   r7   r8   r3   r   torchcatupdater   r   r   _attn_implementationr   trainingattention_dropoutrK   reshape
contiguousr/   )r:   r>   r?   r@   rA   rB   kwargsinput_shapehidden_shapequery_states
key_statesvalue_statesrH   rG   	query_rot
query_passkey_rotkey_passcache_kwargsattention_interfaceattn_outputattn_weightss                         r<   forwardzPhiAttention.forward5   so    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST++L9L))*5J&S 1 1 1112d//112 	
 s/d////0sD--//0 
 2)Wc3O	7 yy)Z!8bAYY2;
%#&snUL'5'<'<ZW[WeWegs't$J(?;;++w6"9$++:Z:Z"[$7	%
  $}}C$2H2HLL	%
 	%
!\ *k));;;;FFHjj-L((r=   )NN)__name__
__module____qualname__r   r1   r%   rO   Tensortupler   r   
LongTensorre   __classcell__r;   s   @r<   r   r   #   s    y S , +/59;)||;) #5<<#=>;) !.	;)
 !;) !!1!12;) 
u||Xell3XeELL>Q5RR	S;)r=   r   c                       e Zd Zy)PhiMLPNrf   rg   rh    r=   r<   ro   ro   s       r=   ro   c                       e Zd Zdedef fdZ	 	 	 	 	 	 	 ddej                  deej                     deej                     dee
ej                        dee   d	ee   d
eej                     dee
ej                  ej                  f      de
ej                  ee
ej                  ej                  f      f   fdZ xZS )PhiDecoderLayerr   r   c                    t         |           t        ||      | _        t	        |      | _        t        j                  |j                  |j                        | _
        t        j                  |j                        | _        y )N)r   r"   )r$   r%   r   	self_attnro   mlpr&   r5   r(   r6   input_layernormDropoutresid_pdropresid_dropoutr9   s      r<   r%   zPhiDecoderLayer.__init__x   s]    %f	B&>!||F,>,>FDYDYZZZ(:(:;r=   r>   r@   position_idsrA   output_attentions	use_cacherB   r?   rC   c	                     |}
| j                  |      } | j                  d||||||||d|	\  }}| j                  |      }| j                  | j                  |            }||z   |
z   }|f}|r||fz  }|S )N)r>   r@   r}   rA   r~   r   rB   r?   rq   )ry   rw   r|   rx   )r:   r>   r@   r}   rA   r~   r   rB   r?   rW   residualattn_outputsself_attn_weightsfeed_forward_hidden_statesoutputss                  r<   re   zPhiDecoderLayer.forward   s     !,,]; +9$.. 
+
')%)/) 3
+
 
+
'' )),7%)%7%78O%P"$'AAHL ")++Gr=   )NNNFFNN)rf   rg   rh   r   r1   r%   rO   ri   r   rk   rj   boolFloatTensorre   rl   rm   s   @r<   rt   rt   w   s   <y <S < 26378<,1$)59KO%||% !.% u//0	%
 !u||!45% $D>% D>% !!1!12% &eELL%,,,F&GH% 
u  (51B1BEDUDU1U+V"WW	X%r=   rt   c                       e Zd Zy)PhiRotaryEmbeddingNrp   rq   r=   r<   r   r      rr   r=   r   c                       e Zd Zdef fdZ	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     dee	   deej                     dee   d	ee   d
ee   deej                     dee   defdZ xZS )PhiModelr   c           	      d   t         |   |       t        j                  t	        |j
                        D cg c]  }t        ||       c}      | _        t        j                  |j                        | _
        t        j                  |j                  |j                        | _        | `y c c}w )Nrv   )r$   r%   r&   
ModuleListrangenum_hidden_layersrt   layersrz   
embd_pdropembed_dropoutr5   r(   r6   final_layernormnormr9   s      r<   r%   zPhiModel.__init__   s     mmAFvG_G_A`aI_VY/a
  ZZ(9(9:!||F,>,>FDYDYZI	 bs   B-	input_idsr@   r}   past_key_valuesinputs_embedsr   r~   output_hidden_statesrB   rW   rC   c
                    ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }|d u |d uz  rt	        d      | j
                  r%| j                  r|rt        j                  d       d}|| j                  |      }|r|
t               }|	F||j                         nd}t        j                  |||j                  d   z   |j                        }	||	j!                  d      }t#        | j                   |||	||      }| j%                  |      }|}| j'                  ||      }|rdnd }|rdnd }| j(                  d | j                   j*                   D ],  }|r||fz  } ||f||||||	|d	|
}|d   }|s$||d   fz  }. | j-                  |      }|r||fz  }t/        ||r|nd ||
      S )Nz:You must specify exactly one of input_ids or inputs_embedszX`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.Fr   r   )device)r   input_embedsr@   rB   r   r}   rq   )r@   r}   rA   r~   r   rB   r?   )last_hidden_stater   r>   
attentions)r   r~   r   r   
ValueErrorgradient_checkpointingrS   loggerwarning_onceembed_tokensr   get_seq_lengthrO   arangerL   r   	unsqueezer   r   
rotary_embr   r   r   r
   )r:   r   r@   r}   r   r   r   r~   r   rB   rW   past_seen_tokenscausal_maskr>   r?   all_hidden_statesall_self_attnsdecoder_layerlayer_outputss                      r<   re   zPhiModel.forward   sI    2C1N-TXT_T_TqTq$8$D $++JjJj 	 "+!6IDKK<Q<Q	-t";<YZZ&&4==Yj I  --i8M0*nO!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L(;;&))+%
 **=9% #oom\J #7BD0d![[)H4;;+H+HI 	6M#!m%55!)
*)."3#-$7
 
M *!,M =#3"55'	6* ,,];  -!11&+/8Od+%	
 	
r=   )	NNNNNNNNN)rf   rg   rh   r   r%   r   rO   rk   ri   r   r   r   r   r   r
   re   rl   rm   s   @r<   r   r      s    y  151537+/59$(,0/359^
E,,-^
 !.^
 u//0	^

 "%^
   1 12^
 D>^
 $D>^
 'tn^
 !!1!12^
 +,^
 
!^
r=   r   c                        e Zd Z fdZ xZS )PhiForCausalLMc                     t         |   |       t        j                  |j                  |j
                  d      | _        y )NTr    )r$   r%   r&   r'   r(   
vocab_sizelm_head)r:   r   r;   s     r<   r%   zPhiForCausalLM.__init__  s0     yy!3!3V5F5FTRr=   )rf   rg   rh   r%   rl   rm   s   @r<   r   r     s    S Sr=   r   c                       e Zd Zy)PhiForSequenceClassificationNrp   rq   r=   r<   r   r     rr   r=   r   c                       e Zd Zy)PhiForTokenClassificationNrp   rq   r=   r<   r   r      rr   r=   r   )PhiPreTrainedModelr   r   r   r   )1typingr   r   rO   torch.nnr&   cache_utilsr   r   masking_utilsr   modeling_layersr	   modeling_outputsr
   modeling_utilsr   processing_utilsr   utilsr   r   clip.modeling_clipr   llama.modeling_llamar   r   r   r   r   r   r   r   configuration_phir   
get_loggerrf   r   _CHECKPOINT_FOR_DOC_CONFIG_FOR_DOCr   ro   rt   r   r   r   r   r   __all__rq   r=   r<   <module>r      s    %   . / 9 6 & 0 (	 	 	 ) 
		H	%' M)> M)`	W 	-0 -`	- 	h
z h
VS% S	#A 		 ; 	r=   