
    rhe              	          d Z ddlZddlmZ ddlmZmZ ddlZddl	Zddlm
Z
 ddlmZmZmZ ddlmZmZ dd	lmZmZmZ dd
lmZmZ ddlmZ  ej6                  e      Ze ed       G d de                    Zd:dej>                  de de!dej>                  fdZ" G d de
jF                        Z$ G d de
jF                        Z% G d de
jF                        Z& G d de
jF                        Z' G d de
jF                        Z( G d  d!e
jF                        Z) G d" d#e
jF                        Z* G d$ d%e
jF                        Z+ G d& d'e
jF                        Z, G d( d)e
jF                        Z- G d* d+e
jF                        Z. G d, d-e
jF                        Z/ G d. d/e
jF                        Z0 G d0 d1e
jF                        Z1e G d2 d3e             Z2e G d4 d5e2             Z3 ed6       G d7 d8e2             Z4g d9Z5y);zPyTorch CvT model.    N)	dataclass)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )$ImageClassifierOutputWithNoAttentionModelOutput)PreTrainedModel find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )	CvtConfigzV
    Base class for model's outputs, with potential hidden states and attentions.
    )custom_introc                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eeej                  df      ed<   y)BaseModelOutputWithCLSTokenz
    cls_token_value (`torch.FloatTensor` of shape `(batch_size, 1, hidden_size)`):
        Classification token at the output of the last layer of the model.
    Nlast_hidden_statecls_token_value.hidden_states)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   tuple     w/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/cvt/modeling_cvt.pyr   r   #   sS    
 6:x 1 12937OXe//07=AM8E%"3"3S"89:Ar#   r   input	drop_probtrainingreturnc                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
            r   r   )r   )dtypedevice)shapendimr   randr+   r,   floor_div)r%   r&   r'   	keep_probr-   random_tensoroutputs          r$   	drop_pathr5   5   s     CxII[[^

Q 77E

5ELL YYMYYy!M1FMr#   c                   x     e Zd ZdZd	dee   ddf fdZdej                  dej                  fdZ	de
fdZ xZS )
CvtDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr&   r(   c                 0    t         |           || _        y N)super__init__r&   )selfr&   	__class__s     r$   r;   zCvtDropPath.__init__M   s    "r#   r   c                 D    t        || j                  | j                        S r9   )r5   r&   r'   )r<   r   s     r$   forwardzCvtDropPath.forwardQ   s    FFr#   c                      d| j                    S )Nzp=r&   )r<   s    r$   
extra_reprzCvtDropPath.extra_reprT   s    DNN#$$r#   r9   )r   r   r   r   r   floatr;   r   Tensorr?   strrB   __classcell__r=   s   @r$   r7   r7   J   sG    b#(5/ #T #GU\\ Gell G%C %r#   r7   c                   (     e Zd ZdZ fdZd Z xZS )CvtEmbeddingsz'
    Construct the CvT embeddings.
    c                     t         |           t        |||||      | _        t	        j
                  |      | _        y )N)
patch_sizenum_channels	embed_dimstridepadding)r:   r;   CvtConvEmbeddingsconvolution_embeddingsr   Dropoutdropout)r<   rK   rL   rM   rN   rO   dropout_rater=   s          r$   r;   zCvtEmbeddings.__init__]   s:    &7!	Z`jq'
# zz,/r#   c                 J    | j                  |      }| j                  |      }|S r9   )rQ   rS   )r<   pixel_valueshidden_states      r$   r?   zCvtEmbeddings.forwardd   s&    22<@||L1r#   r   r   r   r   r;   r?   rF   rG   s   @r$   rI   rI   X   s    0r#   rI   c                   (     e Zd ZdZ fdZd Z xZS )rP   z"
    Image to Conv Embedding.
    c                     t         |           t        |t        j                  j
                        r|n||f}|| _        t        j                  |||||      | _	        t        j                  |      | _        y )N)kernel_sizerN   rO   )r:   r;   
isinstancecollectionsabcIterablerK   r   Conv2d
projection	LayerNormnormalization)r<   rK   rL   rM   rN   rO   r=   s         r$   r;   zCvtConvEmbeddings.__init__o   sa    #-j+//:R:R#SZZdfpYq
$))L)\blst\\)4r#   c                     | j                  |      }|j                  \  }}}}||z  }|j                  |||      j                  ddd      }| j                  r| j	                  |      }|j                  ddd      j                  ||||      }|S Nr      r   )ra   r-   viewpermuterc   )r<   rV   
batch_sizerL   heightwidthhidden_sizes          r$   r?   zCvtConvEmbeddings.forwardv   s    |42>2D2D/
L&%un#((\;OWWXY[\^_`--l;L#++Aq!499*lTZ\abr#   rX   rG   s   @r$   rP   rP   j   s    5
r#   rP   c                   $     e Zd Z fdZd Z xZS )CvtSelfAttentionConvProjectionc           	          t         |           t        j                  |||||d|      | _        t        j
                  |      | _        y )NF)r[   rO   rN   biasgroups)r:   r;   r   r`   convolutionBatchNorm2drc   )r<   rM   r[   rO   rN   r=   s        r$   r;   z'CvtSelfAttentionConvProjection.__init__   sG    99#
  ^^I6r#   c                 J    | j                  |      }| j                  |      }|S r9   )rr   rc   r<   rW   s     r$   r?   z&CvtSelfAttentionConvProjection.forward   s(    ''5)),7r#   r   r   r   r;   r?   rF   rG   s   @r$   rn   rn      s    7r#   rn   c                       e Zd Zd Zy) CvtSelfAttentionLinearProjectionc                 z    |j                   \  }}}}||z  }|j                  |||      j                  ddd      }|S re   )r-   rg   rh   )r<   rW   ri   rL   rj   rk   rl   s          r$   r?   z(CvtSelfAttentionLinearProjection.forward   sK    2>2D2D/
L&%un#((\;OWWXY[\^_`r#   N)r   r   r   r?   r"   r#   r$   rx   rx      s    r#   rx   c                   &     e Zd Zd fd	Zd Z xZS )CvtSelfAttentionProjectionc                 p    t         |           |dk(  rt        ||||      | _        t	               | _        y )Ndw_bn)r:   r;   rn   convolution_projectionrx   linear_projection)r<   rM   r[   rO   rN   projection_methodr=   s         r$   r;   z#CvtSelfAttentionProjection.__init__   s7    '*HT_ahjp*qD'!A!Cr#   c                 J    | j                  |      }| j                  |      }|S r9   )r~   r   ru   s     r$   r?   z"CvtSelfAttentionProjection.forward   s(    22<@--l;r#   )r}   rv   rG   s   @r$   r{   r{      s    Dr#   r{   c                   .     e Zd Z	 d fd	Zd Zd Z xZS )CvtSelfAttentionc                    t         |           |dz  | _        || _        || _        || _        t        |||||dk(  rdn|      | _        t        |||||      | _        t        |||||      | _	        t        j                  |||	      | _        t        j                  |||	      | _        t        j                  |||	      | _        t        j                  |
      | _        y )Ng      avglinear)r   )rp   )r:   r;   scalewith_cls_tokenrM   	num_headsr{   convolution_projection_queryconvolution_projection_keyconvolution_projection_valuer   Linearprojection_queryprojection_keyprojection_valuerR   rS   )r<   r   rM   r[   	padding_q
padding_kvstride_q	stride_kvqkv_projection_methodqkv_biasattention_drop_rater   kwargsr=   s                r$   r;   zCvtSelfAttention.__init__   s     	_
,"",F*?5*HhNc-
) +E{J	Mb+
' -G{J	Mb-
) !#		)YX N ii	98L "		)YX Nzz"56r#   c                     |j                   \  }}}| j                  | j                  z  }|j                  ||| j                  |      j	                  dddd      S )Nr   rf   r   r
   )r-   rM   r   rg   rh   )r<   rW   ri   rl   _head_dims         r$   "rearrange_for_multi_head_attentionz3CvtSelfAttention.rearrange_for_multi_head_attention   sV    %1%7%7"
K>>T^^3  [$..(S[[\]_`bcefggr#   c                 `   | j                   rt        j                  |d||z  gd      \  }}|j                  \  }}}|j	                  ddd      j                  ||||      }| j                  |      }| j                  |      }	| j                  |      }
| j                   rKt        j                  |	fd      }	t        j                  ||fd      }t        j                  ||
fd      }
| j                  | j                  z  }| j                  | j                  |	            }	| j                  | j                  |            }| j                  | j                  |
            }
t        j                   d|	|g      | j"                  z  }t        j$                  j&                  j)                  |d      }| j+                  |      }t        j                   d||
g      }|j                  \  }}}}|j	                  dddd      j-                         j                  ||| j                  |z        }|S )	Nr   r   rf   dimzbhlk,bhtk->bhltzbhlt,bhtv->bhlvr
   )r   r   splitr-   rh   rg   r   r   r   catrM   r   r   r   r   r   einsumr   r   
functionalsoftmaxrS   
contiguous)r<   rW   rj   rk   	cls_tokenri   rl   rL   keyqueryvaluer   attention_scoreattention_probscontextr   s                   r$   r?   zCvtSelfAttention.forward   s   &+kk,FUN@SUV&W#I|0<0B0B-
K#++Aq!499*lTZ\ab--l;11,?11,?IIy%0a8E))Y,!4CIIy%0a8E>>T^^3778M8Me8TU55d6I6I#6NO778M8Me8TU,,'85#,G$**T((--55o25N,,7,,0?E2JK&}}1k1//!Q1-88:??
KY]YgYgjrYrsr#   T)r   r   r   r;   r   r?   rF   rG   s   @r$   r   r      s     '7Rhr#   r   c                   (     e Zd ZdZ fdZd Z xZS )CvtSelfOutputz
    The residual connection is defined in CvtLayer instead of here (as is the case with other models), due to the
    layernorm applied before each block.
    c                     t         |           t        j                  ||      | _        t        j
                  |      | _        y r9   )r:   r;   r   r   denserR   rS   )r<   rM   	drop_rater=   s      r$   r;   zCvtSelfOutput.__init__  s0    YYy)4
zz),r#   c                 J    | j                  |      }| j                  |      }|S r9   r   rS   r<   rW   input_tensors      r$   r?   zCvtSelfOutput.forward	  s$    zz,/||L1r#   rX   rG   s   @r$   r   r      s    
-
r#   r   c                   .     e Zd Z	 d fd	Zd Zd Z xZS )CvtAttentionc                     t         |           t        |||||||||	|
|      | _        t	        ||      | _        t               | _        y r9   )r:   r;   r   	attentionr   r4   setpruned_heads)r<   r   rM   r[   r   r   r   r   r   r   r   r   r   r=   s                r$   r;   zCvtAttention.__init__  sW     	)!
 $Iy9Er#   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   r   )lenr   r   num_attention_headsattention_head_sizer   r   r   r   r   r4   r   all_head_sizeunion)r<   headsindexs      r$   prune_headszCvtAttention.prune_heads0  s   u:?74>>55t~~7Y7Y[_[l[l
u
  2$..2F2FN/0B0BEJ1$..2F2FN.t{{/@/@%QO .2^^-O-ORUV[R\-\*'+~~'I'IDNNLnLn'n$ --33E:r#   c                 P    | j                  |||      }| j                  ||      }|S r9   )r   r4   )r<   rW   rj   rk   self_outputattention_outputs         r$   r?   zCvtAttention.forwardB  s+    nn\65A;;{LAr#   r   )r   r   r   r;   r   r?   rF   rG   s   @r$   r   r     s     "@;$ r#   r   c                   $     e Zd Z fdZd Z xZS )CvtIntermediatec                     t         |           t        j                  |t	        ||z              | _        t        j                         | _        y r9   )r:   r;   r   r   intr   GELU
activation)r<   rM   	mlp_ratior=   s      r$   r;   zCvtIntermediate.__init__I  s7    YYy#i).C*DE
'')r#   c                 J    | j                  |      }| j                  |      }|S r9   )r   r   ru   s     r$   r?   zCvtIntermediate.forwardN  s$    zz,/|4r#   rv   rG   s   @r$   r   r   H  s    $
r#   r   c                   $     e Zd Z fdZd Z xZS )	CvtOutputc                     t         |           t        j                  t	        ||z        |      | _        t        j                  |      | _        y r9   )r:   r;   r   r   r   r   rR   rS   )r<   rM   r   r   r=   s       r$   r;   zCvtOutput.__init__U  s:    YYs9y#899E
zz),r#   c                 T    | j                  |      }| j                  |      }||z   }|S r9   r   r   s      r$   r?   zCvtOutput.forwardZ  s.    zz,/||L1#l2r#   rv   rG   s   @r$   r   r   T  s    -
r#   r   c                   ,     e Zd ZdZ	 d fd	Zd Z xZS )CvtLayerzb
    CvtLayer composed by attention layers, normalization and multi-layer perceptrons (mlps).
    c                 Z   t         |           t        |||||||||	|
||      | _        t	        ||      | _        t        |||      | _        |dkD  rt        |      nt        j                         | _        t        j                  |      | _        t        j                  |      | _        y )Nr*   rA   )r:   r;   r   r   r   intermediater   r4   r7   r   Identityr5   rb   layernorm_beforelayernorm_after)r<   r   rM   r[   r   r   r   r   r   r   r   r   r   drop_path_rater   r=   s                  r$   r;   zCvtLayer.__init__f  s    " 	%!
 ,IyA	9i@BPSVBV~>\^\g\g\i "Y 7!||I6r#   c                    | j                  | j                  |      ||      }|}| j                  |      }||z   }| j                  |      }| j	                  |      }| j                  ||      }| j                  |      }|S r9   )r   r   r5   r   r   r4   )r<   rW   rj   rk   self_attention_outputr   layer_outputs          r$   r?   zCvtLayer.forward  s     $!!,/!

 1>>*:; (,6 ++L9((6 {{<>~~l3r#   r   rX   rG   s   @r$   r   r   a  s    & %7Nr#   r   c                   $     e Zd Z fdZd Z xZS )CvtStagec                 |   t         |           || _        || _        | j                  j                  | j                     rFt        j                  t        j                  dd| j                  j                  d               | _        t        |j                  | j                     |j                  | j                     | j                  dk(  r|j                  n|j                  | j                  dz
     |j                  | j                     |j                  | j                     |j                  | j                           | _        t        j"                  d|j$                  | j                     |j&                  |   d      D cg c]  }|j)                          }}t        j*                  t-        |j&                  | j                           D cg c]T  }t/        |j0                  | j                     |j                  | j                     |j2                  | j                     |j4                  | j                     |j6                  | j                     |j8                  | j                     |j:                  | j                     |j<                  | j                     |j>                  | j                     |j@                  | j                     |j                  | j                     || j                     |jB                  | j                     |j                  | j                           W c} | _"        y c c}w c c}w )Nr   r   r   )rK   rN   rL   rM   rO   rT   cpu)r,   )r   rM   r[   r   r   r   r   r   r   r   r   r   r   r   )#r:   r;   configstager   r   	Parameterr   randnrM   rI   patch_sizespatch_striderL   patch_paddingr   	embeddinglinspacer   depthitem
Sequentialranger   r   
kernel_qkvr   r   r   r   r   r   r   r   layers)r<   r   r   xdrop_path_ratesr   r=   s         r$   r;   zCvtStage.__init__  s   
;;  ,\\%++aDKK<Q<QRT<U*VWDN&))$**5&&tzz204

a,,VEUEUVZV`V`cdVdEe&&tzz2((4))$**5
 $nnQ0E0Edjj0QSYS_S_`eSfotu
AFFH
 
 mm$ v||DJJ78#" ! $..tzz:$..tzz: & 1 1$** =$..tzz:%00<$..tzz:#__TZZ8*0*F*Ftzz*R#__TZZ8(.(B(B4::(N$..tzz:#24::#>$..tzz:#)#3#3DJJ#?
	

s   L4EL9c                 Z   d }| j                  |      }|j                  \  }}}}|j                  ||||z        j                  ddd      }| j                  j
                  | j                     r6| j
                  j                  |dd      }t        j                  ||fd      }| j                  D ]  } ||||      }|} | j                  j
                  | j                     rt        j                  |d||z  gd      \  }}|j                  ddd      j                  ||||      }||fS )Nr   rf   r   r   r   )r   r-   rg   rh   r   r   r   expandr   r   r   r   )	r<   rW   r   ri   rL   rj   rk   layerlayer_outputss	            r$   r?   zCvtStage.forward  s'   	~~l32>2D2D/
L&%#((\6E>RZZ[\^_abc;;  ,--j"bAI 99i%>AFL[[ 	)E!,>M(L	) ;;  ,&+kk,FUN@SUV&W#I|#++Aq!499*lTZ\abY&&r#   rv   rG   s   @r$   r   r     s    (
T'r#   r   c                   &     e Zd Z fdZddZ xZS )
CvtEncoderc                     t         |           || _        t        j                  g       | _        t        t        |j                              D ]'  }| j
                  j                  t        ||             ) y r9   )r:   r;   r   r   
ModuleListstagesr   r   r   appendr   )r<   r   	stage_idxr=   s      r$   r;   zCvtEncoder.__init__  s[    mmB's6<<01 	<IKKx	:;	<r#   c                     |rdnd }|}d }t        | j                        D ]  \  }} ||      \  }}|s||fz   } |st        d |||fD              S t        |||      S )Nr"   c              3   &   K   | ]	  }||  y wr9   r"   ).0vs     r$   	<genexpr>z%CvtEncoder.forward.<locals>.<genexpr>  s     bqTUTabs   r   r   r   )	enumerater   r!   r   )	r<   rV   output_hidden_statesreturn_dictall_hidden_statesrW   r   r   stage_modules	            r$   r?   zCvtEncoder.forward  s    "6BD#	!*4;;!7 	HA&2<&@#L)#$5$G!	H
 b\9>O$Pbbb**%+
 	
r#   )FTrv   rG   s   @r$   r   r     s    <
r#   r   c                   ,    e Zd ZU eed<   dZdZdgZd Zy)CvtPreTrainedModelr   cvtrV   r   c                 R   t        |t        j                  t        j                  f      rt        j                  j                  |j                  j                  d| j                  j                        |j                  _        |j                  %|j                  j                  j                          yyt        |t        j                        rJ|j                  j                  j                          |j                  j                  j                  d       yt        |t              r~| j                  j                  |j                      rZt        j                  j                  |j                  j                  d| j                  j                        |j                  _        yyy)zInitialize the weightsr*   )meanstdNg      ?)r\   r   r   r`   inittrunc_normal_weightdatar   initializer_rangerp   zero_rb   fill_r   r   r   )r<   modules     r$   _init_weightsz CvtPreTrainedModel._init_weights  s   fryy"))45!#!6!6v}}7I7IPSY]YdYdYvYv!6!wFMM{{&  &&( '-KK""$MM$$S)){{$$V\\2(*(=(=$$))9V9V )> )  % 3 *r#   N)	r   r   r   r   r    base_model_prefixmain_input_name_no_split_modulesr  r"   r#   r$   r  r    s     $O#r#   r  c                        e Zd Zd fd	Zd Ze	 	 	 d	deej                     dee	   dee	   de
eef   fd       Z xZS )
CvtModelc                 r    t         |   |       || _        t        |      | _        | j                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)r:   r;   r   r   encoder	post_init)r<   r   add_pooling_layerr=   s      r$   r;   zCvtModel.__init__  s/    
 	 !&)r#   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr!  r   r   r   )r<   heads_to_pruner   r   s       r$   _prune_headszCvtModel._prune_heads!  sE    
 +002 	CLE5LLu%//;;EB	Cr#   rV   r  r	  r(   c                    ||n| j                   j                  }||n| j                   j                  }|t        d      | j	                  |||      }|d   }|s	|f|dd  z   S t        ||j                  |j                        S )Nz You have to specify pixel_valuesr  r	  r   r   r  )r   r  use_return_dict
ValueErrorr!  r   r   r   )r<   rV   r  r	  encoder_outputssequence_outputs         r$   r?   zCvtModel.forward)  s     %9$D $++JjJj 	 &1%<k$++B]B]?@@,,!5# ' 

 *!,#%(;;;*-+;;)77
 	
r#   r   )NNN)r   r   r   r;   r'  r   r   r   rD   boolr   r!   r   r?   rF   rG   s   @r$   r  r    so    C  04/3&*	
u||,
 'tn
 d^	

 
u11	2
 
r#   r  z
    Cvt Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    c                        e Zd Z fdZe	 	 	 	 ddeej                     deej                     dee   dee   de	e
ef   f
d       Z xZS )	CvtForImageClassificationc                    t         |   |       |j                  | _        t        |d      | _        t        j                  |j                  d         | _        |j                  dkD  r-t        j                  |j                  d   |j                        nt        j                         | _        | j                          y )NF)r#  r   r   )r:   r;   
num_labelsr  r  r   rb   rM   	layernormr   r   
classifierr"  )r<   r   r=   s     r$   r;   z"CvtForImageClassification.__init__P  s      ++Fe<f&6&6r&:; CIBSBSVWBWBIIf&&r*F,=,=>]_]h]h]j 	
 	r#   rV   labelsr  r	  r(   c                 b   ||n| j                   j                  }| j                  |||      }|d   }|d   }| j                   j                  d   r| j	                  |      }nI|j
                  \  }}	}
}|j                  ||	|
|z        j                  ddd      }| j	                  |      }|j                  d      }| j                  |      }d}|| j                   j                  | j                   j                  dk(  rd| j                   _
        nv| j                   j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd	| j                   _
        nd
| j                   _
        | j                   j                  dk(  rSt!               }| j                   j                  dk(  r& ||j#                         |j#                               }n |||      }n| j                   j                  d	k(  rGt%               } ||j                  d| j                   j                        |j                  d            }n,| j                   j                  d
k(  rt'               } |||      }|s|f|dd z   }||f|z   S |S t)        |||j*                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr)  r   r   r   rf   r   
regressionsingle_label_classificationmulti_label_classification)losslogitsr   )r   r*  r  r   r3  r-   rg   rh   r  r4  problem_typer2  r+   r   longr   r	   squeezer   r   r   r   )r<   rV   r5  r  r	  outputsr-  r   ri   rL   rj   rk   sequence_output_meanr;  r:  loss_fctr4   s                    r$   r?   z!CvtForImageClassification.forward^  s`    &1%<k$++B]B]((!5#  
 "!*AJ	;;  $"nnY7O6E6K6K3Jfe-22:|VV[^\ddefhiklmO"nn_=O.333:!56{{''/;;))Q./;DKK,[[++a/V\\UZZ5OSYS_S_chclclSl/LDKK,/KDKK,{{''<7"9;;))Q.#FNN$4fnn6FGD#FF3D))-JJ+-B0F0F GUWY))-II,./Y,F)-)9TGf$EvE3f\c\q\qrrr#   )NNNN)r   r   r   r;   r   r   r   rD   r.  r   r!   r   r?   rF   rG   s   @r$   r0  r0  I  s      04)-/3&*<su||,<s &<s 'tn	<s
 d^<s 
u::	;<s <sr#   r0  )r0  r  r  )r*   F)6r   collections.abcr]   dataclassesr   typingr   r   r   torch.utils.checkpointr   torch.nnr   r   r	   modeling_outputsr   r   modeling_utilsr   r   r   utilsr   r   configuration_cvtr   
get_loggerr   loggerr   rD   rC   r.  r5   Moduler7   rI   rP   rn   rx   r{   r   r   r   r   r   r   r   r   r  r  r0  __all__r"   r#   r$   <module>rO     s     ! "    A A Q c c , ( 
		H	% 
B+ B BU\\ e T V[VbVb *%")) %BII $		 2RYY (ryy 
 
Nryy NbBII "6 299 6 r	bii 	
		 
?ryy ?D<'ryy <'~
 
8   , 0
! 0
 0
f Ls 2 LsLs^ Jr#   