
    rhd              	          d Z ddlZddlZddlmZ ddlmZmZ ddlZddl	m
c mZ ddlZddlm
Z
 ddlmZmZmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZ ddlmZmZ ddlm Z   ejB                  e"      Z#d-dejH                  de%de&dejH                  fdZ' G d de
jP                        Z) G d de
jP                        Z* G d de
jP                        Z+ G d de
jP                        Z, G d de
jP                        Z- G d de
jP                        Z. G d  d!e
jP                        Z/ G d" d#e
jP                        Z0e G d$ d%e             Z1e G d& d'e1             Z2 ed()       G d* d+e1             Z3g d,Z4y).zPyTorch PVT model.    N)Iterable)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputImageClassifierOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )	PvtConfiginput	drop_probtrainingreturnc                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
            r   r   )r   )dtypedevice)shapendimtorchrandr   r   floor_div)r   r   r   	keep_probr   random_tensoroutputs          w/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/pvt/modeling_pvt.py	drop_pathr'   *   s     CxII[[^

Q 77E

5ELL YYMYYy!M1FM    c                   x     e Zd ZdZd	dee   ddf fdZdej                  dej                  fdZ	de
fdZ xZS )
PvtDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 0    t         |           || _        y N)super__init__r   )selfr   	__class__s     r&   r.   zPvtDropPath.__init__B   s    "r(   hidden_statesc                 D    t        || j                  | j                        S r,   )r'   r   r   r/   r1   s     r&   forwardzPvtDropPath.forwardF   s    FFr(   c                      d| j                    S )Nzp=)r   )r/   s    r&   
extra_reprzPvtDropPath.extra_reprI   s    DNN#$$r(   r,   )__name__
__module____qualname____doc__r   floatr.   r   Tensorr4   strr6   __classcell__r0   s   @r&   r*   r*   ?   sG    b#(5/ #T #GU\\ Gell G%C %r(   r*   c                        e Zd ZdZ	 ddedeeee   f   deeee   f   dedededef fd	Z	d
e
j                  dedede
j                  fdZde
j                  dee
j                  eef   fdZ xZS )PvtPatchEmbeddingsz
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
    Transformer.
    config
image_size
patch_sizestridenum_channelshidden_size	cls_tokenc                    t         	|           || _        t        |t        j
                  j                        r|n||f}t        |t        j
                  j                        r|n||f}|d   |d   z  |d   |d   z  z  }|| _        || _        || _	        || _
        t        j                  t        j                  d|r|dz   n||            | _        |r*t        j                  t        j                   dd|            nd | _        t        j$                  ||||      | _        t        j(                  ||j*                        | _        t        j.                  |j0                        | _        y )Nr   r   kernel_sizerE   eps)p)r-   r.   rB   
isinstancecollectionsabcr   rC   rD   rF   num_patchesr   	Parameterr   randnposition_embeddingszerosrH   Conv2d
projection	LayerNormlayer_norm_eps
layer_normDropouthidden_dropout_probdropout)
r/   rB   rC   rD   rE   rF   rG   rH   rR   r0   s
            r&   r.   zPvtPatchEmbeddings.__init__T   s0    	#-j+//:R:R#SZZdfpYq
#-j+//:R:R#SZZdfpYq
!!}
15*Q-:VW=:XY$$(&#%<<KKi;?[+V$
  JSekk!Q&DEX\))L+6Zde,,{8M8MNzzF$>$>?r(   
embeddingsheightwidthr   c                    ||z  }t         j                  j                         s<|| j                  j                  | j                  j                  z  k(  r| j
                  S |j                  d||d      j                  dddd      }t        j                  |||fd      }|j                  dd||z        j                  ddd      }|S )Nr   r   r
      bilinear)sizemode)
r   jit
is_tracingrB   rC   rU   reshapepermuteFinterpolate)r/   r_   r`   ra   rR   interpolated_embeddingss         r&   interpolate_pos_encodingz+PvtPatchEmbeddings.interpolate_pos_encodingp   s    un yy##%+9O9ORVR]R]RhRh9h*h+++''65"=EEaAqQ
"#--
&%Wa"b"9"A"A!RRW"X"`"`abdegh"i&&r(   pixel_valuesc                    |j                   \  }}}}|| j                  k7  rt        d      | j                  |      }|j                   ^ }}}|j	                  d      j                  dd      }| j                  |      }| j                  | j                  j                  |dd      }	t        j                  |	|fd      }| j                  | j                  d d dd f   ||      }
t        j                  | j                  d d d df   |
fd      }
n| j                  | j                  ||      }
| j                  ||
z         }|||fS )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.rd   r   rc   dim)r   rF   
ValueErrorrX   flatten	transposer[   rH   expandr   catro   rU   r^   )r/   rp   
batch_sizerF   r`   ra   patch_embed_r_   rH   rU   s              r&   r4   zPvtPatchEmbeddings.forward{   sM   2>2D2D/
L&%4,,,w  ool3'--FE!))!,66q!<__[1
>>%--j"bAIIz#:BJ"&"?"?@X@XYZ\]\^Y^@_agin"o"'))T-E-Ea!e-LNa,bhi"j"&"?"?@X@XZ`bg"h\\*/B"BC
65((r(   F)r7   r8   r9   r:   r   r   intr   boolr.   r   r<   ro   tupler4   r>   r?   s   @r&   rA   rA   M   s      @@ #x},-@ #x},-	@
 @ @ @ @8	'5<< 	' 	'UX 	']b]i]i 	')ELL )U5<<c;Q5R )r(   rA   c                   `     e Zd Zdedef fdZdej                  dej                  fdZ xZ	S )PvtSelfOutputrB   rG   c                     t         |           t        j                  ||      | _        t        j
                  |j                        | _        y r,   )r-   r.   r   Lineardenser\   r]   r^   )r/   rB   rG   r0   s      r&   r.   zPvtSelfOutput.__init__   s6    YY{K8
zz&"<"<=r(   r1   r   c                 J    | j                  |      }| j                  |      }|S r,   )r   r^   r3   s     r&   r4   zPvtSelfOutput.forward   s$    

=1]3r(   )
r7   r8   r9   r   r}   r.   r   r<   r4   r>   r?   s   @r&   r   r      s1    >y >s >
U\\ ell r(   r   c                        e Zd ZdZdedededef fdZdedej                  fd	Z
	 ddej                  d
edededeej                     f
dZ xZS )PvtEfficientSelfAttentionzxEfficient self-attention mechanism with reduction of the sequence [PvT paper](https://huggingface.co/papers/2102.12122).rB   rG   num_attention_headssequences_reduction_ratioc                    t         |           || _        || _        | j                  | j                  z  dk7  r&t	        d| j                   d| j                   d      t        | j                  | j                  z        | _        | j                  | j                  z  | _        t        j                  | j                  | j                  |j                        | _        t        j                  | j                  | j                  |j                        | _        t        j                  | j                  | j                  |j                        | _        t        j                  |j                        | _        || _        |dkD  rEt        j$                  ||||      | _        t        j(                  ||j*                        | _        y y )	Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ())biasr   rJ   rL   )r-   r.   rG   r   rt   r}   attention_head_sizeall_head_sizer   r   qkv_biasquerykeyvaluer\   attention_probs_dropout_probr^   r   rW   sequence_reductionrY   rZ   r[   r/   rB   rG   r   r   r0   s        r&   r.   z"PvtEfficientSelfAttention.__init__   sr    	&#6 d666!;#D$4$4#5 622316 
 $'t'7'7$:R:R'R#S !558P8PPYYt//1C1C&//Z
99T--t/A/AXYYt//1C1C&//Z
zz&"E"EF)B&$q(&(ii[6OXq'D# !ll;F<Q<QRDO	 )r(   r1   r   c                     |j                         d d | j                  | j                  fz   }|j                  |      }|j	                  dddd      S )Nrc   r   rd   r   r
   )rf   r   r   viewrk   )r/   r1   	new_shapes      r&   transpose_for_scoresz.PvtEfficientSelfAttention.transpose_for_scores   sT    !&&("-1I1I4KcKc0dd	%**95$$Q1a00r(   r`   ra   output_attentionsc                    | j                  | j                  |            }| j                  dkD  r{|j                  \  }}}|j	                  ddd      j                  ||||      }| j                  |      }|j                  ||d      j	                  ddd      }| j                  |      }| j                  | j                  |            }	| j                  | j                  |            }
t        j                  ||	j                  dd            }|t        j                  | j                        z  }t         j"                  j%                  |d      }| j'                  |      }t        j                  ||
      }|j	                  dddd      j)                         }|j+                         d d | j,                  fz   }|j/                  |      }|r||f}|S |f}|S )Nr   r   rd   rc   rr   r
   )r   r   r   r   rk   rj   r   r[   r   r   r   matmulrv   mathsqrtr   r   
functionalsoftmaxr^   
contiguousrf   r   r   )r/   r1   r`   ra   r   query_layerry   seq_lenrF   	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss                   r&   r4   z!PvtEfficientSelfAttention.forward   s    //

=0IJ))A-0=0C0C-J)11!Q:BB:|]cejkM 33MBM)11*lBOWWXY[\^_`M OOM:M--dhh}.EF	//

=0IJ !<<Y5H5HR5PQ+dii8P8P.QQ --//0@b/I ,,7_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**+BC6G=/2 O\M]r(   r|   )r7   r8   r9   r:   r   r}   r;   r.   r   r<   r   r~   r   r4   r>   r?   s   @r&   r   r      s     CSS.1SHKShmS:1# 1%,, 1 #(*||* * 	*
  * 
u||	*r(   r   c                        e Zd Zdedededef fdZd Z	 ddej                  ded	ed
e
deej                     f
dZ xZS )PvtAttentionrB   rG   r   r   c                     t         |           t        ||||      | _        t	        ||      | _        t               | _        y )N)rG   r   r   )rG   )r-   r.   r   r/   r   r%   setpruned_headsr   s        r&   r.   zPvtAttention.__init__   sB     	-# 3&?	
	 $FDEr(   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   rr   )lenr   r/   r   r   r   r   r   r   r   r%   r   r   union)r/   headsindexs      r&   prune_headszPvtAttention.prune_heads   s   u:?749900$))2O2OQUQbQb
u
 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r(   r1   r`   ra   r   r   c                 h    | j                  ||||      }| j                  |d         }|f|dd  z   }|S )Nr   r   )r/   r%   )r/   r1   r`   ra   r   self_outputsattention_outputr   s           r&   r4   zPvtAttention.forward  sE     yy?PQ;;|A7#%QR(88r(   r|   )r7   r8   r9   r   r}   r;   r.   r   r   r<   r~   r   r4   r>   r?   s   @r&   r   r      sn    "".1"HK"hm";& _d"\\36?BW[	u||	r(   r   c            
       z     e Zd Z	 	 d	dededee   dee   f fdZdej                  dej                  fdZ	 xZ
S )
PvtFFNrB   in_featureshidden_featuresout_featuresc                 j   t         |           ||n|}t        j                  ||      | _        t        |j                  t              rt        |j                     | _	        n|j                  | _	        t        j                  ||      | _
        t        j                  |j                        | _        y r,   )r-   r.   r   r   dense1rO   
hidden_actr=   r   intermediate_act_fndense2r\   r]   r^   )r/   rB   r   r   r   r0   s        r&   r.   zPvtFFN.__init__  s     	'3'?|[ii_=f''-'-f.?.?'@D$'-'8'8D$ii>zz&"<"<=r(   r1   r   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }| j                  |      }|S r,   )r   r   r^   r   r3   s     r&   r4   zPvtFFN.forward+  sP    M200?]3M2]3r(   )NN)r7   r8   r9   r   r}   r   r.   r   r<   r4   r>   r?   s   @r&   r   r     sY    
 *.&*>> > "#	>
 sm>"U\\ ell r(   r   c                   f     e Zd Zdedededededef fdZddej                  d	ed
ede	fdZ
 xZS )PvtLayerrB   rG   r   r'   r   	mlp_ratioc                 v   t         |           t        j                  ||j                        | _        t        ||||      | _        |dkD  rt        |      nt        j                         | _
        t        j                  ||j                        | _        t        ||z        }t        |||      | _        y )NrL   )rB   rG   r   r   r   )rB   r   r   )r-   r.   r   rY   rZ   layer_norm_1r   	attentionr*   Identityr'   layer_norm_2r}   r   mlp)	r/   rB   rG   r   r'   r   r   mlp_hidden_sizer0   s	           r&   r.   zPvtLayer.__init__5  s     	LL&:O:OP%# 3&?	
 4=s?Y/LL&:O:OPkI56[Rabr(   r1   r`   ra   r   c                    | j                  | j                  |      |||      }|d   }|dd  }| j                  |      }||z   }| j                  | j	                  |            }| j                  |      }||z   }	|	f|z   }|S )N)r1   r`   ra   r   r   r   )r   r   r'   r   r   )
r/   r1   r`   ra   r   self_attention_outputsr   r   
mlp_outputlayer_outputs
             r&   r4   zPvtLayer.forwardK  s    !%++M:/	 "0 "
 2!4(,>>*:;(=8XXd//>?
^^J/
$z1/G+r(   r|   )r7   r8   r9   r   r}   r;   r.   r   r<   r~   r4   r>   r?   s   @r&   r   r   4  so    cc c !	c
 c $)c c,U\\ 3 s _c r(   r   c                   x     e Zd Zdef fdZ	 	 	 d	dej                  dee   dee   dee   de	e
ef   f
dZ xZS )

PvtEncoderrB   c                    t         	|           || _        t        j                  d|j
                  t        |j                        d      j                         }g }t        |j                        D ]  }|j                  t        ||dk(  r|j                  n| j                  j                  d|dz   z  z  |j                  |   |j                  |   |dk(  r|j                   n|j"                  |dz
     |j"                  |   ||j                  dz
  k(                t%        j&                  |      | _        g }d}t        |j                        D ]  }g }|dk7  r||j                  |dz
     z  }t        |j                  |         D ]\  }|j                  t+        ||j"                  |   |j,                  |   |||z      |j.                  |   |j0                  |                ^ |j                  t%        j&                  |              t%        j&                  |      | _        t%        j4                  |j"                  d   |j6                  	      | _        y )
Nr   cpu)r   rd   r   )rB   rC   rD   rE   rF   rG   rH   )rB   rG   r   r'   r   r   rc   rL   )r-   r.   rB   r   linspacedrop_path_ratesumdepthstolistrangenum_encoder_blocksappendrA   rC   patch_sizesstridesrF   hidden_sizesr   
ModuleListpatch_embeddingsr   r   sequence_reduction_ratios
mlp_ratiosblockrY   rZ   r[   )
r/   rB   drop_path_decaysr_   iblockscurlayersjr0   s
            r&   r.   zPvtEncoder.__init__c  s-    !>>!V-B-BCDV_delln 
v001 	A"!45Fv00@V@V[\abefaf[g@h%11!4!>>!,89Q!4!4FDWDWXY\]X]D^ & 3 3A 66#<#<q#@@
	 !#j 9 v001 	1AFAvv}}QU++6==+, 
%$*$7$7$:,2,F,Fq,I"237";282R2RST2U"("3"3A"6	
 MM"--/0!	1$ ]]6*
 ,,v':':2'>FDYDYZr(   rp   r   output_hidden_statesreturn_dictr   c                 2   |rdnd }|rdnd }|j                   d   }t        | j                        }|}	t        t	        | j
                  | j                              D ]|  \  }
\  }} ||	      \  }	}}|D ]&  } ||	|||      }|d   }	|r	||d   fz   }|s!||	fz   }( |
|dz
  k7  sI|	j                  |||d      j                  dddd      j                         }	~ | j                  |	      }	|r||	fz   }|st        d |	||fD              S t        |	||      S )	N r   r   rc   r
   rd   c              3   &   K   | ]	  }||  y wr,   r   ).0vs     r&   	<genexpr>z%PvtEncoder.forward.<locals>.<genexpr>  s     mq_`_lms   last_hidden_stater1   
attentions)r   r   r   	enumeratezipr   rj   rk   r   r[   r   r   )r/   rp   r   r   r   all_hidden_statesall_self_attentionsry   
num_blocksr1   idxembedding_layerblock_layerr`   ra   r   layer_outputss                    r&   r4   zPvtEncoder.forward  si    #7BD$5b4!''*
_
$3<SAVAVX\XbXb=c3d 	v/C//;+:=+I(M65$ M %mVUDU V -a 0$*=qAQ@S*S''(9]<L(L%M j1n$ - 5 5j&%QS T \ \]^`acdfg h s s u	v 6 1]4D Dm]4EGZ$[mmm++*
 	
r(   )FFT)r7   r8   r9   r   r.   r   FloatTensorr   r~   r   r   r   r4   r>   r?   s   @r&   r   r   b  sn    0[y 0[j -2/4&*#
''#
 $D>#
 'tn	#

 d^#
 
uo%	&#
r(   r   c                   H    e Zd ZU eed<   dZdZg Zdej                  ddfdZ
y)PvtPreTrainedModelrB   pvtrp   moduler   Nc                 l   | j                   j                  }t        |t        j                  t        j
                  f      rht        j                  j                  |j                  j                  d|       |j                  %|j                  j                  j                          yyt        |t        j                        rJ|j                  j                  j                          |j                  j                  j                  d       yt        |t              rt        j                  j                  |j                  j                  d|      |j                  _	        |j                   Ft        j                  j                  |j                   j                  d|      |j                   _	        yyy)zInitialize the weightsr   )meanstdNg      ?)rB   initializer_rangerO   r   r   rW   inittrunc_normal_weightdatar   zero_rY   fill_rA   rU   rH   )r/   r  r  s      r&   _init_weightsz PvtPreTrainedModel._init_weights  s@   kk++fryy"))45 GG!!&--"4"43C!H{{&  &&( '-KK""$MM$$S) 23.0gg.C.C**// /D /F&&+
 +(*(=(=$$)) )> )  % , 4r(   )r7   r8   r9   r   __annotations__base_model_prefixmain_input_name_no_split_modulesr   Moduler  r   r(   r&   r  r    s0    $OBII $ r(   r  c                        e Zd Zdef fdZd Ze	 	 	 d
dej                  de	e
   de	e
   de	e
   deeef   f
d	       Z xZS )PvtModelrB   c                 r    t         |   |       || _        t        |      | _        | j                          y r,   )r-   r.   rB   r   encoder	post_initr/   rB   r0   s     r&   r.   zPvtModel.__init__  s1      "&) 	r(   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  layerr   r   )r/   heads_to_pruner  r   s       r&   _prune_headszPvtModel._prune_heads  sE    
 +002 	CLE5LLu%//;;EB	Cr(   rp   r   r   r   r   c                 ,   ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }| j	                  ||||      }|d   }|s	|f|dd  z   S t        ||j                  |j                        S )Nrp   r   r   r   r   r   r   )rB   r   r   use_return_dictr  r   r1   r   )r/   rp   r   r   r   encoder_outputssequence_outputs          r&   r4   zPvtModel.forward  s     2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B],,%/!5#	 ' 
 *!,#%(;;;-)77&11
 	
r(   )NNN)r7   r8   r9   r   r.   r   r   r   r  r   r~   r   r   r   r4   r>   r?   s   @r&   r  r    s    y C  -1/3&*
''
 $D>
 'tn	

 d^
 
uo%	&
 
r(   r  z
    Pvt Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    )custom_introc                        e Zd Zdeddf fdZe	 	 	 	 ddeej                     deej                     dee	   dee	   d	ee	   de
eef   fd
       Z xZS )PvtForImageClassificationrB   r   Nc                 0   t         |   |       |j                  | _        t        |      | _        |j                  dkD  r-t        j                  |j                  d   |j                        nt        j                         | _	        | j                          y )Nr   rc   )r-   r.   
num_labelsr  r  r   r   r   r   
classifierr  r  s     r&   r.   z"PvtForImageClassification.__init__  sy      ++F# FLEVEVYZEZBIIf))"-v/@/@A`b`k`k`m 	
 	r(   rp   labelsr   r   r   c                 (   ||n| j                   j                  }| j                  ||||      }|d   }| j                  |dddddf         }d}	|| j                   j                  | j
                  dk(  rd| j                   _        nl| j
                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j                  dk(  rIt               }
| j
                  dk(  r& |
|j                         |j                               }	n |
||      }	n| j                   j                  dk(  r=t               }
 |
|j                  d| j
                        |j                  d            }	n,| j                   j                  dk(  rt               }
 |
||      }	|s|f|dd z   }|	|	f|z   S |S t        |	||j                   |j"                  	      S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr"  r   r   
regressionsingle_label_classificationmulti_label_classificationrc   )losslogitsr1   r   )rB   r#  r  r+  problem_typer*  r   r   longr}   r	   squeezer   r   r   r   r1   r   )r/   rp   r,  r   r   r   r   r%  r2  r1  loss_fctr%   s               r&   r4   z!PvtForImageClassification.forward%  s    &1%<k$++B]B]((%/!5#	  
 "!*Aq!9:{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE$!//))	
 	
r(   )NNNN)r7   r8   r9   r   r.   r   r   r   r<   r~   r   r   r   r4   r>   r?   s   @r&   r(  r(    s    y T   *.,0/3&*;
u||,;
 &;
 $D>	;

 'tn;
 d^;
 
u++	,;
 ;
r(   r(  )r(  r  r  )r   F)5r:   rP   r   collections.abcr   typingr   r   r   torch.nn.functionalr   r   rl   torch.utils.checkpointtorch.nnr   r   r	   activationsr   modeling_outputsr   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   configuration_pvtr   
get_loggerr7   loggerr<   r;   r~   r'   r  r*   rA   r   r   r   r   r   r   r  r  r(  __all__r   r(   r&   <module>rE     ss  "    $ "      A A ! F - Q , ( 
		H	%U\\ e T V[VbVb *%")) %A) A)H	BII 	O		 Od'299 'TRYY 6+ryy +\V
 V
r   @ 0
! 0
 0
f K
 2 K
K
\ Jr(   