
    rhQ                     X   d dl mZmZmZmZ d dlZd dlmc mZ	 d dlmZ ddl
mZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZ ddlmZ ddlmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% ddl&m'Z'  e       r	d dl(m)Z)m*Z* nd\  Z)Z*e)e*fZ+ e,e+      Z- ej\                  e/      Z0 G d de"      Z1 G d de#      Z2 G d dejf                        Z4 G d de      Z5 G d de      Z6 G d dejf                        Z7 G d  d!e      Z8 G d" d#e!      Z9 G d$ d%e       Z: G d& d'e      Z;g d(Z<y))    )AnyCallableOptionalUnionN)nn   )DynamicCache)create_causal_mask)GradientCheckpointingLayer)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)is_causal_conv1d_available   )apply_mask_to_padding_states)LlamaAttentionLlamaForCausalLM
LlamaModelLlamaPreTrainedModelLlamaRMSNormLlamaRotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )
Lfm2Config)causal_conv1d_fncausal_conv1d_updateNNc                       e Zd Zy)Lfm2RMSNormN__name__
__module____qualname__     x/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/lfm2/modular_lfm2.pyr"   r"   7       r(   r"   c                       e Zd Zy)Lfm2RotaryEmbeddingNr#   r'   r(   r)   r,   r,   ;   r*   r(   r,   c                   *     e Zd Zdef fdZd Z xZS )Lfm2MLPconfigc                    t         |           |j                  }|j                  rat	        d|z  dz        }|j
                  Dt	        |j
                  |z        }|j                  ||j                  z   dz
  |j                  z  z  }t        j                  |j                  |d      | _
        t        j                  |j                  |d      | _        t        j                  ||j                  d      | _        y )Nr   r   r   Fbias)super__init__intermediate_sizeblock_auto_adjust_ff_dimintblock_ffn_dim_multiplierblock_multiple_ofr   Linearhidden_sizew1w3w2)selfr/   r5   	__class__s      r)   r4   zLfm2MLP.__init__@   s    "44** #A(9$9A$= >..:$'(G(GJ[([$\!$*$<$<&)A)AAAE&JbJbb%! ))F..0AN))F..0AN))-v/A/ANr(   c                     | j                  t        j                  | j                  |            | j	                  |      z        S N)r>   Fsilur<   r=   )r?   xs     r)   forwardzLfm2MLP.forwardO   s/    wwqvvdggaj)DGGAJ677r(   )r$   r%   r&   r   r4   rF   __classcell__r@   s   @r)   r.   r.   ?   s    Oz O8r(   r.   c                   Z   e Zd ZdZdZdZdZdZej                  dfde
dedej                  deej                  edf   fdZ	 dd	ej"                  d
ej"                  dedeeeef      deej"                  ej"                  f   f
dZdej.                  fdZddee   defdZdej"                  dedeeef   fdZdefdZdedeej"                  ej"                  f   fdZdeeej"                     eej"                     f   fdZeddeeeej>                           ddfd       Z d Z!y)Lfm2HybridConvCachea  
    Attention and conv cache for Lfm2.

    It stores the Key and Value states as a list of tensors, one for each layer.
    Attention layer cache shape: `[batch_size, num_heads, seq_len, head_dim]`.
    Conv layer cache shape: `[batch_size, hidden_size, L_cache-1]`.
    NFr/   max_batch_sizedtypedevicec                 ,   g | _         g | _        || _        |j                  | _        | j                  j	                  d      | _        |j                  | _        || _        g | _        |t        j                  |      nd }t        |j                        D ]~  }t        j                  | j                  |j                  | j                  | j                  |      }t        j                  j!                  |       | j                  j#                  |        y )Nfull_attention)rL   rM   )	key_cachevalue_cacherK   layer_typesindexfirst_attention_layerconv_L_cache_dtype
conv_cachetorchrM   rangenum_hidden_layerszerosr;   _dynamomark_static_addressappend)r?   r/   rK   rL   rM   _
conv_states          r)   r4   zLfm2HybridConvCache.__init__b   s     ,!--%)%5%5%;%;<L%M""//.0)/);f%v//0 		/A##""!!kkJ MM--j9OO"":.		/r(   
key_statesvalue_states	layer_idxcache_kwargsreturnc                 &   |qt        | j                        |k  rt        t        | j                        |      D ]^  }| j                  j                  t	        j
                  g              | j                  j                  t	        j
                  g              ` | j                  j                  |       | j                  j                  |       n| j                  |   j                         s|| j                  |<   || j                  |<   nft	        j                  | j                  |   |gd      | j                  |<   t	        j                  | j                  |   |gd      | j                  |<   | j                  |   | j                  |   fS )a  
        Updates the cache with the new `key_states` and `value_states` for the layer `layer_idx`.

        Parameters:
            key_states (`torch.Tensor`):
                The new key states to cache.
            value_states (`torch.Tensor`):
                The new value states to cache.
            layer_idx (`int`):
                The index of the layer to cache the states for.
            cache_kwargs (`Dict[str, Any]`, `optional`):
                Additional arguments for the cache subclass. No additional arguments are used in `DynamicCache`.

        Return:
            A tuple containing the updated key and value states.
        dim)	lenrP   rY   r^   rX   tensorrQ   numelcat)r?   ra   rb   rc   rd   r_   s         r)   updatezLfm2HybridConvCache.update   sB   0 !4>>"i/s4>>2I> >ANN))%,,r*:;$$++ELL,<=> %%j1  ''5NN9-335,6y).:  +,1IIt~~i7PR\6]ce,fy).3ii9I9I)9TVb8cik.l  +~~i($*:*:9*EEEr(   beam_idxc                 D   t        t        | j                              D ]  }| j                  |   j                  }| j                  |   j	                  d|j                  |            | j                  |<   | j                  |   j                  }| j                  |   j	                  d|j                  |            | j                  |<   | j                  |   j                  }| j                  |   j	                  d|j                  |            | j                  |<    y)zDReorders the cache for beam search, given the selected beam indices.r   N)rY   rj   rP   rM   index_selecttorQ   rW   )r?   ro   rc   rM   s       r)   reorder_cachez!Lfm2HybridConvCache.reorder_cache   s    s4>>23 	iI^^I.55F(,y(A(N(NqRZR]R]^dRe(fDNN9%%%i077F*.*:*:9*E*R*RSTV^VaVabhVi*jDY'__Y/66F)-)C)P)PQRT\T_T_`fTg)hDOOI&	ir(   c                     | j                   |   dk7  r| j                  n|}t        | j                        |k  s | j                  |   j	                         dk(  ry| j                  |   j
                  d   S )zYReturns the sequence length of the cached states. A layer index can be optionally passed.rO   r   rg   )rR   rT   rj   rP   rl   shaper?   rc   s     r)   get_seq_lengthz"Lfm2HybridConvCache.get_seq_length   sm     372B2B92MQa2aD..gp	t~~)+t~~i/H/N/N/PTU/U~~i(..r22r(   cache_positionc                 V    d}|j                   d   }| j                         }||z   }||fS )aB  
        Return a tuple (kv_length, kv_offset) corresponding to the length and offset that will be returned for
        the given layer at `layer_idx`.
        The masks are then prepared according to the given lengths (kv_length, kv_offset) and patterns (i.e. sliding_window, chunk_size),
        for each layer.
        r   )ru   rw   )r?   rx   rc   full_mask_kv_offsetquery_lengthpast_seen_tokens	kv_lengths          r)   get_mask_sizesz"Lfm2HybridConvCache.get_mask_sizes   s@      %++A...0 #33	---r(   
max_lengthc                    |dk  r| j                         t        |      z
  }| j                         |k  ryt        t        | j                              D ]l  }| j                  |   j                         s!| j                  |   dd|ddf   | j                  |<   | j                  |   dd|ddf   | j                  |<   n y)z"Crop the cache to the given lengthr   N.)rw   absrY   rj   rP   rl   rQ   )r?   r   idxs      r)   cropzLfm2HybridConvCache.crop   s    >,,.Z@J J.T^^,- 	SC~~c"((*&*nnS&9#{
{A:M&Ns#(,(8(8(=c;J;PQ>Q(R  %	Sr(   c                 >    | j                   |   | j                  |   fS rB   )rP   rQ   rv   s     r)   __getitem__zLfm2HybridConvCache.__getitem__   s!    ~~i($*:*:9*EEEr(   c                     t        d      Nz<Lfm2HybridConvCache does not have a legacy cache equivalent.NotImplementedError)r?   s    r)   to_legacy_cachez#Lfm2HybridConvCache.to_legacy_cache   s    !"`aar(   past_key_valuesr	   c                     t        d      r   r   )clsr   s     r)   from_legacy_cachez%Lfm2HybridConvCache.from_legacy_cache   s    !"`aar(   c                     t        t        | j                              D ]  }| j                  |   j                          ! y rB   )rY   rj   rW   zero_rv   s     r)   resetzLfm2HybridConvCache.reset   s4    s4??34 	/IOOI&,,.	/r(   rB   )r   )"r$   r%   r&   __doc__rK   is_compileablerP   rQ   rX   float32r   r7   rL   r   rM   strr4   Tensorr   dictr   tuplern   
LongTensorrs   rw   r~   r   r   r   classmethodFloatTensorr   r   r'   r(   r)   rJ   rJ   S   s    NNIK #]]15// / {{	/
 ellC-./D 26)FLL)F ll)F 	)F
 tCH~.)F 
u||U\\)	*)FV	ie&6&6 	i3 3c 3.U\\ .c .eTWY\T\o .Ss SFS FU5<<3M-N FbuU\\':E%,,<O'O!P b buUEVEV?W9X0Y bes b b/r(   rJ   c                   (    e Zd Zdedef fdZ	 	 ddej                  deej                  ej                  f   de	ej                     de	e
   de	ej                     d	eej                  e	ej                     e	eej                        f   fd
Z xZS )Lfm2Attentionr/   rc   c                    t         |   ||       t        j                  |j                  |j
                  | j                  z  d      | _        t        j                  |j                  |j                  | j                  z  d      | _	        t        j                  |j                  |j                  | j                  z  d      | _
        t        j                  |j
                  | j                  z  |j                  d      | _        t        | j                  |j                        | _        t        | j                  |j                        | _        | `| `y )NFr1   eps)r3   r4   r   r:   r;   num_attention_headshead_dimq_projnum_key_value_headsk_projv_projout_projr"   norm_epsq_layernormk_layernormo_projattention_dropoutr?   r/   rc   r@   s      r)   r4   zLfm2Attention.__init__   s    +ii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejk		&"<"<t}}"LfN`N`glm&t}}&//J&t}}&//JK"r(   hidden_statesposition_embeddingsattention_maskpast_key_valuerx   re   c                 4   |j                   d d }g |d| j                  }| j                   | j                  |      j                  |       j                  dd      }	| j                   | j                  |      j                  |       j                  dd      }
 | j                  |      j                  | j                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}t        }| j                  j                  dk7  rt        | j                  j                     } || |	|
||fd| j                   d|\  }} |j"                  g |d j%                         }| j'                  |      }||fS )Nr   r   )sincosrx   eagerg        )dropoutscaling)ru   r   r   r   view	transposer   r   r   r   rn   rc   r   r/   _attn_implementationr   r   reshape
contiguousr   )r?   r   r   r   r   rx   kwargsinput_shapehidden_shapequery_statesra   rb   r   r   rd   attention_interfaceattn_outputattn_weightsoutputs                      r)   rF   zLfm2Attention.forward   s    $))#2.88b8$--8''(GM(B(G(G(VWaabcefg%%&Edkk-&@&E&E|&TU__`acde
6t{{=166EOOPQSTU&S#7jRUWZ#[ j%#&snUL'5'<'<ZW[WeWegs't$J(?;;++w6"9$++:Z:Z"[$7	%
 LL	%
 	%
!\ *k));;;;FFH{+|##r(   r    )r$   r%   r&   r   r7   r4   rX   r   r   r   rJ   r   rF   rG   rH   s   @r)   r   r      s    	#z 	#c 	#  9=59'$||'$ #5<<#=>'$ !.	'$
 !!45'$ !!1!12'$ 
u||Xell3XeELL>Q5RR	S'$r(   r   c            
       r    e Zd Zdedef fdZ	 	 	 ddej                  dee	   deej                     deej                     fdZ	 	 	 ddej                  dee	   deej                     deej                     fd	Z	 	 	 dd
ej                  dee	   deej                     deej                     fdZ xZS )Lfm2ShortConvr/   rc   c           	      2   t         |           || _        || _        |j                  | _        |j                  | _        t        j                  |j                  |j                  | j
                  |j                  | j                  | j
                  dz
        | _        t        j                  |j                  d|j                  z  | j                        | _        t        j                  |j                  |j                  | j                        | _        y )Nr   )in_channelsout_channelskernel_sizegroupsr2   paddingr   r1   )r3   r4   r/   rc   rU   L_cache	conv_biasr2   r   Conv1dr;   convr:   in_projr   r   s      r)   r4   zLfm2ShortConv.__init__  s    
 	"**$$	II**++%%LL1$
	 yy!3!3Q9K9K5KRVR[R[\		&"4"4f6H6HtyyYr(   rE   r   rx   r   c                    t        ||      }| j                  |      j                  dd      }|j                  dd      \  }}}||z  }| j                  j
                  j                  | j                  j
                  j                  d      | j                  j
                  j                  d            }	|c|d   dkD  r[t        |j                  d      |j                  | j                     |	| j                  j                  d       }
|
j                  d      }
n|dt        j                  j!                  || j"                  |j$                  d   z
  df      }|j                  | j                     j'                  |       t)        ||	| j                  j                  d       }
||
z  }| j+                  |j                  dd      j-                               }|S )Nr   rg   r   rh   r   r   )
activation)r   r   r   chunkr   weightr   sizer   squeezerW   rc   r2   	unsqueezer   
functionalpadr   ru   copy_r   r   r   )r?   rE   r   rx   r   BCxBCBxconv_weightsconv_outr`   ys                r)   cuda_kernels_forwardz"Lfm2ShortConv.cuda_kernels_forward4  s    )N;ll1o''B/))A2)&1aUyy'',,TYY-=-=-B-B1-EtyyGWGWG\G\]^G_`%.*;a*?+

2))$..9		H  ))"-H)]]..rDLL288B<4OQR3ST
))$..9??
K'L$))..UYZHLMM!++b"-88:;r(   c                    |j                   d   }t        ||      }| j                  |      j                  dd      }|j	                  dd      \  }}}||z  }	|5|d   dkD  r,|j
                  | j                     }
|j                  d| j                  dz
        }|
j                  dd      }
|	j                  |
j                  |
j                        |
d d d d |f<   |j
                  | j                     j                  |
       t        j                  |
j                  |	j                        | j                   j"                  d d dd d f   z  d      }| j$                  r|| j                   j$                  z  }|j'                  d      }n~|dt(        j*                  j-                  |	| j                  |	j                   d   z
  df      }
|j
                  | j                     j                  |
       | j!                  |	      d	d |f   }||z  }|j                  dd      j/                         }| j1                  |      }|S )
Nr   r   rg   r   rh   r   )shiftsdims)rM   rL   .)ru   r   r   r   r   rW   rc   clampr   rollrr   rM   rL   r   rX   sumr   r   r2   r   r   r   r   r   r   )r?   rE   r   rx   r   seqlenr   r   r   r   r`   r   r   s                r)   slow_forwardzLfm2ShortConv.slow_forwardV  s    (N;ll1o''B/))A2)&1aU%.*;a*?'224>>BJ+11!T\\A5EFN#<J/1uuJ<M<MU_UeUeu/fJq!^+,%%dnn5;;JGyyryy!9DII<L<LQPQSTW<U!U[]^HyyDIINN*))"-H)]]..rDLL288B<4OQR3ST
))$..9??
Kyy}S'6'\2HLKKB**,MM!r(   r   c                     t         rJd|j                  j                  v r2t        j                  j                         s| j                  ||||      S | j                  ||||      S )Ncuda)is_fast_path_availablerM   typerX   r\   is_compilingr   r   )r?   r   r   rx   r   s        r)   rF   zLfm2ShortConv.forward|  s\     "f0D0D0I0I&IRWR_R_RlRlRn,,]NN\jkk  P^__r(   )NNN)r$   r%   r&   r   r7   r4   rX   r   r   rJ   r   r   r   rF   rG   rH   s   @r)   r   r     s    ZZ Z2 9=5915 <<  !!45  !!1!12	 
 !. J 9=5915$<<$ !!45$ !!1!12	$
 !.$R 9=5915	`||	` !!45	` !!1!12		`
 !.	`r(   r   c                       e Zd Zdedef fdZ	 	 	 	 ddej                  deej                  ej                  f   de	ej                     de	ej                     de	eej                        d	e	ej                     d
ej                  fdZ xZS )Lfm2DecoderLayerr/   rc   c                 f   t         |           |j                  |   dk(  | _        | j                  rt	        ||      | _        nt        ||      | _        t        |      | _	        t        |j                  |j                        | _        t        |j                  |j                        | _        y )NrO   r   )r3   r4   rR   is_attention_layerr   	self_attnr   r   r.   feed_forwardr"   r;   r   operator_normffn_normr   s      r)   r4   zLfm2DecoderLayer.__init__  s    "("4"4Y"?CS"S""*69=DN%fi8DI#FO(););Q#F$6$6FOOLr(   r   r   r   position_idsr   rx   re   c           
         |}| j                   r, | j                  d| j                  |      |||||d|\  }}	n$| j                  | j                  |      |||      }||z   }|| j	                  | j                  |            z   }|S )N)r   r   r   r   r   rx   )r   r   rx   r   r'   )r   r   r   r   r   r   )
r?   r   r   r   r   r   rx   r   residualr_   s
             r)   rF   zLfm2DecoderLayer.forward  s     !""-t~~  "00?$7-)--   M1 !II"00?---	 & M &0%(9(9$--:V(WWr(   )NNNN)r$   r%   r&   r   r7   r4   rX   r   r   r   r   rF   rG   rH   s   @r)   r   r     s    
Mz 
Mc 
M  26378<59|| #5<<#=> !.	
 u//0 !u||!45 !!1!12 
r(   r   c                       e Zd ZdZy)Lfm2PreTrainedModelFN)r$   r%   r&   _can_compile_fullgraphr'   r(   r)   r   r     s    "r(   r   c                        e Zd Zdef fdZ	 	 	 	 	 	 	 ddeej                     deej                     deej                     dee	   deej                     dee   d	eej                     d
ee   defdZ xZS )	Lfm2Modelr/   c                     t         |   |       t        |      | _        t	        |j
                  |j                        | _        | `| `	y )Nr   )
r3   r4   r,   pos_embr"   r;   r   embedding_normnorm
rotary_emv)r?   r/   r@   s     r)   r4   zLfm2Model.__init__  s?     *62)&*<*<&//RIOr(   	input_idsr   r   r   inputs_embeds	use_cacherx   r   re   c           
         |d u |d uz  rt        d      || j                  |      }|r>|<|j                  d   }	t        | j                  |	| j
                  | j                        }|F||j                         nd}
t        j                  |
|
|j                  d   z   |j                        }||j                  d      }t        | j                  |||||      }|}| j                  ||      }| j                  d | j                  j                   D ]  } ||f|||||d|} | j                  |      }t!        ||      S )	Nz:You must specify exactly one of input_ids or inputs_embedsr   )r/   rK   rL   rM   r   )rM   )r/   input_embedsr   rx   r   r   )r   r   r   rx   r   )last_hidden_stater   )
ValueErrorembed_tokensru   rJ   r/   rL   rM   rw   rX   aranger   r
   r  layersrZ   r  r   )r?   r  r   r   r   r  r	  rx   r   
batch_sizer|   causal_maskr   r   decoder_layers                  r)   rF   zLfm2Model.forward  s    -t";<YZZ  --i8M0&,,Q/J1{{:TZZX\XcXcO !CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L(;;&))+%
 &"ll=,G "[[)H4;;+H+HI 		M)*).-$7 M		 ++M:&++
 	
r(   )NNNNNNN)r$   r%   r&   r   r4   r   rX   r   r   rJ   r   boolr   r   r   rF   rG   rH   s   @r)   r  r    s    z  1515379=59$(59=
E,,-=
 !.=
 u//0	=

 ""56=
   1 12=
 D>=
 !!1!12=
 +,=
 
!=
r(   r  c                       e Zd Zy)Lfm2ForCausalLMNr#   r'   r(   r)   r  r    r*   r(   r  )r  r  r   )=typingr   r   r   r   rX   torch.nn.functionalr   r   rC   cache_utilsr	   masking_utilsr
   modeling_layersr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   utils.import_utilsr   bamba.modeling_bambar   llama.modeling_llamar   r   r   r   r   r   r   r   configuration_lfm2r   causal_conv1dr   r   kernel_modulesallr   
get_loggerr$   loggerr"   r,   Moduler.   rJ   r   r   r   r   r  r  __all__r'   r(   r)   <module>r+     s'   2 1     ' / 9 7 5 & 0 < ?	 	 	 + DD-7** #$89^,  
		H	%	, 		. 	8bii 8(Q/, Q/h3$N 3$lh`BII h`V,1 ,^#. #E

 E
P	& 	 Br(   