
    rh                        d dl mZmZ d dlZd dlZd dlmZ ddlmZmZ ddl	m
Z
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ ddl%m&Z& ddl'm(Z(  ejR                  e*      Z+ G d de!      Z, G d de      Z- G d de      Z. G d de"      Z/ G d de&      Z0 G d de      Z1 G d de      Z2 G d  d!e       Z3 G d" d#e      Z4g d$Z5y)%    )CallableOptionalN)nn   )CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)check_model_inputs   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForQuestionAnsweringLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLPLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward)MistralModel   )Qwen2Configc                        e Zd Z fdZ xZS )Qwen2MLPc                 J   t         |   |       t        j                  | j                  | j
                  d      | _        t        j                  | j                  | j
                  d      | _        t        j                  | j
                  | j                  d      | _        y )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_projselfconfig	__class__s     z/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/qwen2/modular_qwen2.pyr'   zQwen2MLP.__init__%   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWX    )__name__
__module____qualname__r'   __classcell__r1   s   @r2   r"   r"   $   s    Y Yr3   r"   c                   2    e Zd Zdedef fdZ	 	 ddej                  deej                  ej                  f   de	ej                     de	e
   de	ej                     d	ee   d
eej                  e	ej                     e	eej                        f   fdZ xZS )Qwen2Attentionr0   	layer_idxc                 l   t         |   ||       t        j                  |j                  |j
                  | j                  z  d      | _        t        j                  |j                  |j                  | j                  z  d      | _	        t        j                  |j                  |j                  | j                  z  d      | _
        t        j                  |j
                  | j                  z  |j                  d      | _        |j                  |   dk(  r|j                  | _        y d | _        y )NTr$   Fsliding_attention)r&   r'   r   r(   r)   num_attention_headshead_dimq_projnum_key_value_headsk_projv_projo_projlayer_typessliding_windowr/   r0   r;   r1   s      r2   r'   zQwen2Attention.__init__-   s    +ii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii : :T]] JFL^L^ejk7=7I7I)7TXk7kf33qur3   hidden_statesposition_embeddingsattention_maskpast_key_valuecache_positionkwargsreturnc                 J   |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}t        }| j                  j                  dk7  rt        | j                  j                     } || |	|
||f| j                  sdn| j                  | j                   | j"                  d|\  }} |j$                  g |d j'                         }| j)                  |      }||fS )Nr   r   )sincosrL   eagerg        )dropoutscalingrF   )shaper?   r@   view	transposerB   rC   r   updater;   r   r0   _attn_implementationr   trainingattention_dropoutrU   rF   reshape
contiguousrD   )r/   rH   rI   rJ   rK   rL   rM   input_shapehidden_shapequery_states
key_statesvalue_statesrR   rQ   cache_kwargsattention_interfaceattn_outputattn_weightss                     r2   forwardzQwen2Attention.forward5   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j%#&snUL'5'<'<ZW[WeWegs't$J(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL..
%
 
%
!\ *k));;;;FFHkk+.L((r3   )NN)r4   r5   r6   r    intr'   torchTensortupler   r   
LongTensorr   r   rh   r7   r8   s   @r2   r:   r:   ,   s    v{ vs v +/59*)||*) #5<<#=>*) !.	*)
 !*) !!1!12*) -.*) 
u||Xell3XeELL>Q5RR	S*)r3   r:   c                   (     e Zd Zdedef fdZ xZS )Qwen2DecoderLayerr0   r;   c                 J    t         |           |j                  |   | _        y )N)r&   r'   rE   attention_typerG   s      r2   r'   zQwen2DecoderLayer.__init__c   s!    $00;r3   )r4   r5   r6   r    ri   r'   r7   r8   s   @r2   ro   ro   b   s    <{ <s < <r3   ro   c                       e Zd Zy)Qwen2PreTrainedModelNr4   r5   r6    r3   r2   rs   rs   h       r3   rs   c                       e Zd Zdef fdZee	 	 	 	 	 	 	 ddeej                     deej                     deej                     dee   deej                     dee   d	eej                     d
ee   defd              Z xZS )
Qwen2Modelr0   c                 ^    t         |   |       d| j                  j                  v | _        y )Nr=   )r&   r'   r0   rE   has_sliding_layersr.   s     r2   r'   zQwen2Model.__init__m   s'     "59P9P"Pr3   	input_idsrJ   position_idspast_key_valuesinputs_embeds	use_cacherL   rM   rN   c                    |d u |d uz  rt        d      || j                  |      }|r|
t               }|F||j                         nd}	t	        j
                  |	|	|j                  d   z   |j                        }||j                  d      }t        |x}
t              s:| j                  |||||d}dt        d
i |i}
| j                  rt        d
i ||
d<   |}| j                  ||      }| j                   d | j                  j"                   D ]  } ||f|
|j$                     |||||d|}! | j'                  |      }t)        ||r|	      S d 	      S )Nz:You must specify exactly one of input_ids or inputs_embedsr   r   )device)r0   input_embedsrJ   rL   r}   r|   full_attentionr=   )rJ   r|   rK   r   rL   rI   )last_hidden_stater}   ru   )
ValueErrorembed_tokensr   get_seq_lengthrj   arangerV   r   	unsqueeze
isinstancedictr0   r	   rz   r
   
rotary_emblayersnum_hidden_layersrq   normr   )r/   r{   rJ   r|   r}   r~   r   rL   rM   past_seen_tokenscausal_mask_mappingmask_kwargsrH   rI   decoder_layers                  r2   rh   zQwen2Model.forwardq   s    -t";<YZZ  --i8M0*nO!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L ?-F ++ -"0"0#2 ,K !"4"C{"C# &&;\;k_j;k#$78% #oom\J![[)H4;;+H+HI 
	M)	2=3O3OP).#-$7	 	M
	 		-0&+/8O
 	
>B
 	
r3   )NNNNNNN)r4   r5   r6   r    r'   r   r   r   rj   rm   rk   r   FloatTensorboolr   r   r   rh   r7   r8   s   @r2   rx   rx   l   s    Q{ Q  151537+/59$(59E
E,,-E
 !.E
 u//0	E

 "%E
   1 12E
 D>E
 !!1!12E
 +,E
 
!E
  E
r3   rx   c                       e Zd Zy)Qwen2ForCausalLMNrt   ru   r3   r2   r   r      rv   r3   r   c                       e Zd Zy)Qwen2ForSequenceClassificationNrt   ru   r3   r2   r   r      rv   r3   r   c                       e Zd Zy)Qwen2ForTokenClassificationNrt   ru   r3   r2   r   r      rv   r3   r   c                       e Zd Zy)Qwen2ForQuestionAnsweringNrt   ru   r3   r2   r   r      rv   r3   r   )rs   rx   r   r   r   r   )6typingr   r   rj   torch.utils.checkpointr   cache_utilsr   r   masking_utilsr	   r
   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   llama.modeling_llamar   r   r   r   r   r   r   r   r   r   mistral.modeling_mistralr   configuration_qwen2r    
get_loggerr4   loggerr"   r:   ro   rs   rx   r   r   r   r   __all__ru   r3   r2   <module>r      s    %    . R B 6 & @ @ /   4 , 
		H	%Yx Y3)^ 3)l<) <	/ 	L
 L
^	' 		%C 		"= 		 9 	r3   