
    rhR              	       P   d Z ddlZddlmZ ddlmZmZ ddlZddlZddlm	Z	 ddl
mZmZmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZmZmZmZ ddlmZ  ej:                  e      ZdZ dZ!g dZ"dZ#dZ$d6dejJ                  de&de'dejJ                  fdZ( G d de	jR                        Z* G d de	jR                        Z+ G d de	jR                        Z, G d de	jR                        Z- G d d e	jR                        Z. G d! d"e	jR                        Z/ G d# d$e	jR                        Z0 G d% d&e	jR                        Z1 G d' d(e	jR                        Z2 G d) d*e	jR                        Z3 G d+ d,e      Z4d-Z5d.Z6 ed/e5       G d0 d1e4             Z7 ed2e5       G d3 d4e4             Z8g d5Z9y)7z-PyTorch Visual Attention Network (VAN) model.    N)OrderedDict)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardlogging   )	VanConfigr   z!Visual-Attention-Network/van-base)r   i      r   ztabby, tabby catinput	drop_probtrainingreturnc                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
            r   r   )r   )dtypedevice)shapendimtorchrandr   r   floor_div)r   r   r   	keep_probr   random_tensoroutputs          /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/deprecated/van/modeling_van.py	drop_pathr)   3   s     CxII[[^

Q 77E

5ELL YYMYYy!M1FM    c                   x     e Zd ZdZd	dee   ddf fdZdej                  dej                  fdZ	de
fdZ xZS )
VanDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 0    t         |           || _        y N)super__init__r   )selfr   	__class__s     r(   r0   zVanDropPath.__init__J   s    "r*   hidden_statesc                 D    t        || j                  | j                        S r.   )r)   r   r   )r1   r3   s     r(   forwardzVanDropPath.forwardN   s    FFr*   c                      d| j                    S )Nzp=)r   )r1   s    r(   
extra_reprzVanDropPath.extra_reprQ   s    DNN#$$r*   r.   )__name__
__module____qualname____doc__r   floatr0   r!   Tensorr5   strr7   __classcell__r2   s   @r(   r,   r,   G   sG    b#(5/ #T #GU\\ Gell G%C %r*   r,   c            	       n     e Zd ZdZd
dedededef fdZdej                  dej                  fd	Z xZ	S )VanOverlappingPatchEmbeddera  
    Downsamples the input using a patchify operation with a `stride` of 4 by default making adjacent windows overlap by
    half of the area. From [PVTv2: Improved Baselines with Pyramid Vision
    Transformer](https://huggingface.co/papers/2106.13797).
    in_channelshidden_size
patch_sizestridec                     t         |           t        j                  |||||dz        | _        t        j
                  |      | _        y )N   )kernel_sizerF   padding)r/   r0   r   Conv2dconvolutionBatchNorm2dnormalization)r1   rC   rD   rE   rF   r2   s        r(   r0   z$VanOverlappingPatchEmbedder.__init__\   sD    99*VU_cdUd
  ^^K8r*   r   r   c                 J    | j                  |      }| j                  |      }|S r.   )rL   rN   )r1   r   hidden_states      r(   r5   z#VanOverlappingPatchEmbedder.forwardc   s(    ''.)),7r*   )r   r
   
r8   r9   r:   r;   intr0   r!   r=   r5   r?   r@   s   @r(   rB   rB   U   sE    9C 9c 9s 9X[ 9U\\ ell r*   rB   c                   v     e Zd ZdZ	 	 ddededededef
 fdZdej                  d	ej                  fd
Z
 xZS )VanMlpLayerz
    MLP with depth-wise convolution, from [PVTv2: Improved Baselines with Pyramid Vision
    Transformer](https://huggingface.co/papers/2106.13797).
    rC   rD   out_channels
hidden_actdropout_ratec                 X   t         |           t        j                  ||d      | _        t        j                  ||dd|      | _        t        |   | _        t        j                  |      | _	        t        j                  ||d      | _
        t        j                  |      | _        y )Nr   rI      rI   rJ   groups)r/   r0   r   rK   in_dense
depth_wiser   
activationDropoutdropout1	out_densedropout2)r1   rC   rD   rU   rV   rW   r2   s         r(   r0   zVanMlpLayer.__init__o   s~     			+{J))K!UV_jk ,

<0;!L

<0r*   rP   r   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }| j	                  |      }| j                  |      }|S r.   )r]   r^   r_   ra   rb   rc   r1   rP   s     r(   r5   zVanMlpLayer.forward   s\    }}\2|4|4}}\2~~l3}}\2r*   )gelu      ?)r8   r9   r:   r;   rR   r>   r<   r0   r!   r=   r5   r?   r@   s   @r(   rT   rT   i   s`     !!11 1 	1
 1 1 ELL U\\ r*   rT   c                   `     e Zd ZdZdef fdZdej                  dej                  fdZ xZ	S )VanLargeKernelAttentionz-
    Basic Large Kernel Attention (LKA).
    rD   c                     t         |           t        j                  ||dd|      | _        t        j                  ||ddd|      | _        t        j                  ||d	      | _        y )
N   rH   r[   r   rZ   	   )rI   dilationrJ   r\   r   rY   )r/   r0   r   rK   r^   depth_wise_dilated
point_wiser1   rD   r2   s     r(   r0   z VanLargeKernelAttention.__init__   s]    ))K!UV_jk"$))!aS^#
 ))K!Lr*   rP   r   c                 l    | j                  |      }| j                  |      }| j                  |      }|S r.   )r^   rn   ro   re   s     r(   r5   zVanLargeKernelAttention.forward   s4    |4..|<|4r*   rQ   r@   s   @r(   ri   ri      s1    MC MELL U\\ r*   ri   c                   `     e Zd ZdZdef fdZdej                  dej                  fdZ xZ	S )VanLargeKernelAttentionLayerzV
    Computes attention using Large Kernel Attention (LKA) and attends the input.
    rD   c                 B    t         |           t        |      | _        y r.   )r/   r0   ri   	attentionrp   s     r(   r0   z%VanLargeKernelAttentionLayer.__init__   s    0=r*   rP   r   c                 2    | j                  |      }||z  }|S r.   )ru   )r1   rP   ru   attendeds       r(   r5   z$VanLargeKernelAttentionLayer.forward   s    NN<0	)+r*   rQ   r@   s   @r(   rs   rs      s/    >C >ELL U\\ r*   rs   c                   f     e Zd ZdZddedef fdZdej                  dej                  fdZ	 xZ
S )	VanSpatialAttentionLayerz
    Van spatial attention layer composed by projection (via conv) -> act -> Large Kernel Attention (LKA) attention ->
    projection (via conv) + residual connection.
    rD   rV   c           
         t         |           t        j                  t	        dt        j
                  ||d      fdt        |   fg            | _        t        |      | _	        t        j
                  ||d      | _
        y )Nconvr   rY   act)r/   r0   r   
Sequentialr   rK   r   pre_projectionrs   attention_layerpost_projection)r1   rD   rV   r2   s      r(   r0   z!VanSpatialAttentionLayer.__init__   sr     mmRYY{KQOPF:./
  <KH!yykqQr*   rP   r   c                 z    |}| j                  |      }| j                  |      }| j                  |      }||z   }|S r.   )r~   r   r   r1   rP   residuals      r(   r5   z VanSpatialAttentionLayer.forward   sG    **<8++L9++L9#h.r*   )rf   )r8   r9   r:   r;   rR   r>   r0   r!   r=   r5   r?   r@   s   @r(   ry   ry      s9    
RC RS RELL U\\ r*   ry   c                   f     e Zd ZdZddedef fdZdej                  dej                  fdZ	 xZ
S )	VanLayerScalingzT
    Scales the inputs by a learnable parameter initialized by `initial_value`.
    rD   initial_valuec                     t         |           t        j                  |t	        j
                  |      z  d      | _        y )NT)requires_grad)r/   r0   r   	Parameterr!   onesweight)r1   rD   r   r2   s      r(   r0   zVanLayerScaling.__init__   s/    ll=5::k3J#JZ^_r*   rP   r   c                 `    | j                   j                  d      j                  d      |z  }|S )N)r   	unsqueezere   s     r(   r5   zVanLayerScaling.forward   s,    {{,,R0::2>Mr*   )g{Gz?)r8   r9   r:   r;   rR   r<   r0   r!   r=   r5   r?   r@   s   @r(   r   r      s9    `C ` `ELL U\\ r*   r   c            	       r     e Zd ZdZ	 	 d
dedededef fdZdej                  dej                  fd	Z
 xZS )VanLayerzv
    Van layer composed by normalization layers, large kernel attention (LKA) and a multi layer perceptron (MLP).
    configrD   	mlp_ratiodrop_path_ratec                    t         |           |dkD  rt        |      nt        j                         | _        t        j                  |      | _        t        ||j                        | _
        t        ||j                        | _        t        j                  |      | _        t        |||z  ||j                  |j                         | _        t        ||j                        | _        y )Nr   )r/   r0   r,   r   Identityr)   rM   pre_normomalizationry   rV   ru   r   layer_scale_init_valueattention_scalingpost_normalizationrT   rW   mlpmlp_scaling)r1   r   rD   r   r   r2   s        r(   r0   zVanLayer.__init__   s     	8F8L^4RTR]R]R_#%>>+#> 1+v?P?PQ!0f>[>[!\"$.."=y0+v?P?PRXReRe
 +;8U8UVr*   rP   r   c                 2   |}| j                  |      }| j                  |      }| j                  |      }| j                  |      }||z   }|}| j	                  |      }| j                  |      }| j                  |      }| j                  |      }||z   }|S r.   )r   ru   r   r)   r   r   r   r   s      r(   r5   zVanLayer.forward   s    //=~~l3--l;~~l3,...|<xx-''5~~l3,.r*   )r
   rg   r8   r9   r:   r;   r   rR   r<   r0   r!   r=   r5   r?   r@   s   @r(   r   r      s[      #WW W 	W
 W$ELL U\\ r*   r   c                        e Zd ZdZ	 	 ddededededededed	ef fd
Zdej                  dej                  fdZ
 xZS )VanStagez2
    VanStage, consisting of multiple layers.
    r   rC   rD   rE   rF   depthr   r   c	                    t         
|           t        ||||      | _        t	        j
                  t        |      D 	cg c]  }	t        ||||       c}	 | _        t	        j                  ||j                        | _        y c c}	w )N)r   r   eps)r/   r0   rB   
embeddingsr   r}   ranger   layers	LayerNormlayer_norm_epsrN   )r1   r   rC   rD   rE   rF   r   r   r   _r2   s             r(   r0   zVanStage.__init__  s     	5k;PZ\bcmm u  '#1	

  \\+6;P;PQs   BrP   r   c                    | j                  |      }| j                  |      }|j                  \  }}}}|j                  d      j	                  dd      }| j                  |      }|j                  ||||      j                  dddd      }|S )NrH   r   r   rZ   )r   r   r   flatten	transposerN   viewpermute)r1   rP   
batch_sizerD   heightwidths         r(   r5   zVanStage.forward!  s    |4{{<01=1C1C.
K#++A.88A>)),7#((VUKPXXYZ\]_`bcdr*   )r
   r   r   r@   s   @r(   r   r     s      #RR R 	R
 R R R R R4	ELL 	U\\ 	r*   r   c                   p     e Zd ZdZdef fdZ	 	 d	dej                  dee	   dee	   de
eef   fdZ xZS )

VanEncoderz4
    VanEncoder, consisting of multiple stages.
    r   c                 X   t         |           t        j                  g       | _        |j
                  }|j                  }|j                  }|j                  }|j                  }t        j                  d|j                  t        |j                        d      D cg c]  }|j                          }}t        t!        ||||||            D ]S  \  }	\  }
}}}}}|	dk(  }||	dz
     }|r|j"                  }| j                  j%                  t'        ||||
||||             U y c c}w )Nr   cpu)r   r   )rE   rF   r   r   r   )r/   r0   r   
ModuleListstagespatch_sizesstrideshidden_sizesdepths
mlp_ratiosr!   linspacer   sumitem	enumeratezipnum_channelsappendr   )r1   r   r   r   r   r   r   xdrop_path_rates	num_stagerE   rF   rD   r   mlp_expantionr   is_first_stagerC   r2   s                     r(   r0   zVanEncoder.__init__2  s+   mmB'((..**&&
#nnQ0E0Es6==GYbgh
AFFH
 
 clWlFJXc
 	^I^
FK~ '!^N&y1}5K$11KK)!+#1			
s   D'rP   output_hidden_statesreturn_dictr   c                     |rdnd }t        | j                        D ]  \  }} ||      }|s||fz   } |st        d ||fD              S t        ||      S )N c              3   &   K   | ]	  }||  y wr.   r   ).0vs     r(   	<genexpr>z%VanEncoder.forward.<locals>.<genexpr>a  s     WqWs   )last_hidden_stater3   )r   r   tupler   )r1   rP   r   r   all_hidden_statesr   stage_modules          r(   r5   zVanEncoder.forwardR  sp     #7BD(5 	HOA|'5L#$5$G!		H W\3D$EWWW-\mnnr*   )FT)r8   r9   r:   r;   r   r0   r!   r=   r   boolr   r   r   r5   r?   r@   s   @r(   r   r   -  sd    y F 05&*	ollo 'tno d^	o
 
u44	5or*   r   c                   .    e Zd ZU dZeed<   dZdZdZd Z	y)VanPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    r   vanpixel_valuesTc                    t        |t        j                        rt        j                  j	                  |j
                  | j                  j                         t        |t        j                        r8|j                  +t        j                  j                  |j                  d       yyyt        |t        j                        rUt        j                  j                  |j                  d       t        j                  j                  |j
                  d       yt        |t        j                        r|j                  d   |j                  d   z  |j                  z  }||j                  z  }|j
                  j                  j!                  dt#        j$                  d|z               |j                  %|j                  j                  j'                          yyy)zInitialize the weights)stdNr   g      ?r   g       @)
isinstancer   Linearinittrunc_normal_r   r   initializer_rangebias	constant_r   rK   rI   rU   r\   datanormal_mathsqrtzero_)r1   modulefan_outs      r(   _init_weightsz VanPreTrainedModel._init_weightsq  sC   fbii(GG!!&--T[[5R5R!S&")),1H!!&++q1 2I,-GGfkk1-GGfmmS1		*((+f.@.@.CCfFYFYYG%GMM&&q$))C'M*BC{{&  &&( '	 +r*   N)
r8   r9   r:   r;   r   __annotations__base_model_prefixmain_input_namesupports_gradient_checkpointingr   r   r*   r(   r   r   f  s%    
 $O&*#)r*   r   aE  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. Use it
    as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`VanConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
aF  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`ConvNextImageProcessor.__call__`] for details.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all stages. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zxThe bare VAN model outputting raw features without any specific head on top. Note, VAN does not have an embedding layer.c                        e Zd Z fdZ ee       eeee	de
      	 	 d	deej                     dee   dee   deeef   fd              Z xZS )
VanModelc                     t         |   |       || _        t        |      | _        t        j                  |j                  d   |j                        | _	        | j                          y )Nr   r   )r/   r0   r   r   encoderr   r   r   r   	layernorm	post_initr1   r   r2   s     r(   r0   zVanModel.__init__  sP     !&)f&9&9"&=6CXCXYr*   vision)
checkpointoutput_typeconfig_classmodalityexpected_outputr   r   r   r   c                    ||n| j                   j                  }||n| j                   j                  }| j                  |||      }|d   }|j	                  ddg      }|s
||f|dd  z   S t        |||j                        S )Nr   r   r   r   )dimr   )r   pooler_outputr3   )r   r   use_return_dictr   meanr   r3   )r1   r   r   r   encoder_outputsr   pooled_outputs          r(   r5   zVanModel.forward  s     %9$D $++JjJj 	 &1%<k$++B]B],,!5# ' 

 ,A.)..B8.<%}58KKK7/')77
 	
r*   )NN)r8   r9   r:   r0   r   VAN_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr   r!   FloatTensorr   r   r   r5   r?   r@   s   @r(   r   r     s     ++?@&<$. 04&*	
u001
 'tn
 d^	

 
u>>	?
 A
r*   r   z
    VAN Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                        e Zd Z fdZ ee       eeee	e
      	 	 	 	 d	deej                     deej                     dee   dee   deeef   f
d              Z xZS )
VanForImageClassificationc                    t         |   |       t        |      | _        |j                  dkD  r-t        j                  |j                  d   |j                        nt        j                         | _	        | j                          y )Nr   r   )r/   r0   r   r   
num_labelsr   r   r   r   
classifierr   r   s     r(   r0   z"VanForImageClassification.__init__  sl     F# FLEVEVYZEZBIIf))"-v/@/@A`b`k`k`m 	
 	r*   )r   r   r   r   r   labelsr   r   r   c                 h   ||n| j                   j                  }| j                  |||      }|r|j                  n|d   }| j	                  |      }d}|| j                   j
                  | j                   j                  dk(  rd| j                   _        nv| j                   j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rSt               }	| j                   j                  dk(  r& |	|j                         |j                               }n |	||      }n| j                   j
                  dk(  rGt               }	 |	|j                  d| j                   j                        |j                  d            }n,| j                   j
                  dk(  rt               }	 |	||      }|s|f|dd z   }
||f|
z   S |
S t!        |||j"                  	      S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   
regressionsingle_label_classificationmulti_label_classificationr   rH   )losslogitsr3   )r   r  r   r   r  problem_typer  r   r!   longrR   r	   squeezer   r   r   r   r3   )r1   r   r  r   r   outputsr  r  r  loss_fctr'   s              r(   r5   z!VanForImageClassification.forward  s   ( &1%<k$++B]B]((<>R`k(l1<--'!*/{{''/;;))Q./;DKK,[[++a/V\\UZZ5OSYS_S_chclclSl/LDKK,/KDKK,{{''<7"9;;))Q.#FNN$4fnn6FGD#FF3D))-JJ+-B0F0F GUWY))-II,./Y,F)-)9TGf$EvE3f\c\q\qrrr*   )NNNN)r8   r9   r:   r0   r   r  r   _IMAGE_CLASS_CHECKPOINTr   r  _IMAGE_CLASS_EXPECTED_OUTPUTr   r!   r	  
LongTensorr   r   r   r5   r?   r@   s   @r(   r  r    s    	 ++?@*8$4	 59-1/3&*0su0010s ))*0s 'tn	0s
 d^0s 
u::	;0s A0sr*   r  )r  r   r   )r   F):r;   r   collectionsr   typingr   r   r!   torch.utils.checkpointr   torch.nnr   r   r	   activationsr   modeling_outputsr   r   r   modeling_utilsr   utilsr   r   r   r   configuration_vanr   
get_loggerr8   loggerr  r  r  r  r  r=   r<   r   r)   Moduler,   rB   rT   ri   rs   ry   r   r   r   r   r   VAN_START_DOCSTRINGr  r   r  __all__r   r*   r(   <module>r,     s   4  # "    A A " 
 / v v ( 
		H	%  : '  > 1 U\\ e T V[VbVb (%")) %")) (")) @bii (299 ryy 8bii (ryy (V(ryy (V6o 6or) )8	   
-
! -

-
`  Cs 2 CsCsL Jr*   