
    rh]                        d dl mZmZ d dlZd dlmZ d dlmZ ddlmZm	Z	 ddl
mZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% ddl&m'Z'  ejP                  e)      Z* G d de!      Z+ G d de      Z, G d de      Z- G d de#      Z. G d de"      Z/ G d de      Z0 G d de       Z1 G d  d!e      Z2 G d" d#ee.      Z3g d$Z4y)%    )CallableOptionalN)nn)check_model_inputs   )CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)GenericForQuestionAnswering)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLP
LlamaModelLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward   )MistralConfigc                        e Zd Z fdZ xZS )
MistralMLPc                 J   t         |   |       t        j                  | j                  | j
                  d      | _        t        j                  | j                  | j
                  d      | _        t        j                  | j
                  | j                  d      | _        y )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_proj)selfconfig	__class__s     ~/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/mistral/modular_mistral.pyr'   zMistralMLP.__init__%   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWX    )__name__
__module____qualname__r'   __classcell__r0   s   @r1   r"   r"   $   s    Y Yr2   r"   c                   2    e Zd Zdedef fdZ	 	 ddej                  deej                  ej                  f   de	ej                     de	e
   de	ej                     d	ee   d
eej                  e	ej                     e	eej                        f   fdZ xZS )MistralAttentionr/   	layer_idxc                 l   t         |           t        |dd       xs |j                  |j                  z  | _        t        j                  |j                  |j                  | j
                  z  d      | _        t        j                  |j                  |j                  | j
                  z  d      | _
        t        j                  |j                  |j                  | j
                  z  d      | _        t        j                  |j                  | j
                  z  |j                  d      | _        y )Nhead_dimFr$   )r&   r'   getattrr)   num_attention_headsr<   r   r(   q_projnum_key_value_headsk_projv_projo_projr.   r/   r:   r0   s      r1   r'   zMistralAttention.__init__-   s    
D9mV=O=OSYSmSm=mii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii : :T]] JFL^L^ejkr2   hidden_statesposition_embeddingsattention_maskpast_key_valuecache_positionkwargsreturnc           
      `   |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}t        }| j                  j                  dk7  rt        | j                  j                     } || |	|
||f| j                  sdn| j                  | j                   t#        | j                  dd       d|\  }} |j$                  g |d j'                         }| j)                  |      }||fS )	Nr   r   )sincosrI   eagerg        sliding_window)dropoutscalingrQ   )shaper<   r?   view	transposerA   rB   r   updater:   r   r/   _attn_implementationr   trainingattention_dropoutrS   r=   reshape
contiguousrC   )r.   rE   rF   rG   rH   rI   rJ   input_shapehidden_shapequery_states
key_statesvalue_statesrO   rN   cache_kwargsattention_interfaceattn_outputattn_weightss                     r1   forwardzMistralAttention.forward5   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j%#&snUL'5'<'<ZW[WeWegs't$J(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL"4;;0@$G
%
 
%
!\ *k));;;;FFHkk+.L((r2   )NN)r3   r4   r5   r    intr'   torchTensortupler   r   
LongTensorr   r   rf   r6   r7   s   @r1   r9   r9   ,   s    l} l l +/59*)||*) #5<<#=>*) !.	*)
 !*) !!1!12*) -.*) 
u||Xell3XeELL>Q5RR	S*)r2   r9   c                   (     e Zd Zdedef fdZ xZS )MistralDecoderLayerr/   r:   c                 j    t         |   ||       t        ||      | _        t	        |      | _        y )N)r/   r:   )r&   r'   r9   	self_attnr"   mlprD   s      r1   r'   zMistralDecoderLayer.__init__c   s,    +)9Mf%r2   )r3   r4   r5   r    rg   r'   r6   r7   s   @r1   rm   rm   b   s    &} & & &r2   rm   c                       e Zd ZeedZy)MistralPreTrainedModel)rE   
attentionsN)r3   r4   r5   rm   r9   _can_record_outputs r2   r1   rr   rr   i   s    ,&r2   rr   c                       e Zd Zee	 	 	 	 	 	 	 ddeej                     deej                     deej                     dee	   deej                     dee   deej                     d	ee   d
efd              Zy)MistralModelN	input_idsrG   position_idspast_key_valuesinputs_embeds	use_cacherI   rJ   rK   c                 |   |d u |d uz  rt        d      || j                  |      }|r|
t               }|F||j                         nd}	t	        j
                  |	|	|j                  d   z   |j                        }||j                  d      }| j                  j                  t        nt        }
 |
| j                  |||||      }|}| j                  ||      }| j                  d | j                  j                   D ]  } ||f||||||d|} | j!                  |      }t#        ||r|      S d       S )Nz:You must specify exactly one of input_ids or inputs_embedsr   r   )device)r/   input_embedsrG   rI   rz   ry   )rG   ry   rH   r|   rI   rF   )last_hidden_staterz   )
ValueErrorembed_tokensr	   get_seq_lengthrh   arangerT   r~   	unsqueezer/   rQ   r
   r   
rotary_emblayersnum_hidden_layersnormr   )r.   rx   rG   ry   rz   r{   r|   rI   rJ   past_seen_tokensmask_functioncausal_maskrE   rF   decoder_layers                  r1   rf   zMistralModel.forwardq   s~    -t";<YZZ  --i8M0*nO!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L.2kk.H.H.P*Vw#;;&))+%
 &"oom\J![[)H4;;+H+HI 
	M)	*).#-$7	 	M
	 		-0&+/8O
 	
>B
 	
r2   )NNNNNNN)r3   r4   r5   r   r   r   rh   rk   ri   r   FloatTensorboolr   r   r   rf   ru   r2   r1   rw   rw   p   s     151537+/59$(599
E,,-9
 !.9
 u//0	9

 "%9
   1 129
 D>9
 !!1!129
 +,9
 
!9
  9
r2   rw   c                       e Zd Zy)MistralForCausalLMNr3   r4   r5   ru   r2   r1   r   r          r2   r   c                       e Zd Zy)MistralForTokenClassificationNr   ru   r2   r1   r   r      r   r2   r   c                       e Zd Zy) MistralForSequenceClassificationNr   ru   r2   r1   r   r      r   r2   r   c                       e Zd Zy)MistralForQuestionAnsweringNr   ru   r2   r1   r   r      s    r2   r   )r   r   rw   rr   r   r   )5typingr   r   rh   r   transformers.utils.genericr   cache_utilsr   r	   masking_utilsr
   r   modeling_flash_attention_utilsr   modeling_layersr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   r   llama.modeling_llamar   r   r   r   r   r   r   r   r   r   configuration_mistralr    
get_loggerr3   loggerr"   r9   rm   rr   rw   r   r   r   r   __all__ru   r2   r1   <module>r      s    %   9 . R B 8 5 & @ @   1 
		H	%Y Y3)~ 3)l&+ &1 <
: <
~	) 		$? 		'E 	 \"=?U [r2   