
    rhR              	          d Z ddlmZmZ ddlZddlZddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZmZmZmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ  ej6                  e      Zd-dej<                  dede dej<                  fdZ! G d dejD                        Z# G d dejD                        Z$ G d dejD                        Z% G d dejD                        Z& G d dejD                        Z' G d dejD                        Z( G d d ejD                        Z)e G d! d"e             Z*e G d# d$e*             Z+ ed%&       G d' d(e*             Z, ed)&       G d* d+e*e             Z-g d,Z.y).zPyTorch ConvNextV2 model.    )OptionalUnionN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging)BackboneMixin   )ConvNextV2Configinput	drop_probtrainingreturnc                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
            r   r   )r   )dtypedevice)shapendimtorchrandr   r   floor_div)r   r   r   	keep_probr   random_tensoroutputs          /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/convnextv2/modeling_convnextv2.py	drop_pathr'   )   s     CxII[[^

Q 77E

5ELL YYMYYy!M1FM    c                   x     e Zd ZdZd	dee   ddf fdZdej                  dej                  fdZ	de
fdZ xZS )
ConvNextV2DropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 0    t         |           || _        y N)super__init__r   )selfr   	__class__s     r&   r.   zConvNextV2DropPath.__init__A   s    "r(   hidden_statesc                 D    t        || j                  | j                        S r,   )r'   r   r   r/   r1   s     r&   forwardzConvNextV2DropPath.forwardE   s    FFr(   c                      d| j                    S )Nzp=)r   )r/   s    r&   
extra_reprzConvNextV2DropPath.extra_reprH   s    DNN#$$r(   r,   )__name__
__module____qualname____doc__r   floatr.   r   Tensorr4   strr6   __classcell__r0   s   @r&   r*   r*   >   sG    b#(5/ #T #GU\\ Gell G%C %r(   r*   c                   `     e Zd ZdZdef fdZdej                  dej                  fdZ xZ	S )ConvNextV2GRNz)GRN (Global Response Normalization) layerdimc                     t         |           t        j                  t	        j
                  ddd|            | _        t        j                  t	        j
                  ddd|            | _        y )Nr   )r-   r.   r   	Parameterr   zerosweightbias)r/   rB   r0   s     r&   r.   zConvNextV2GRN.__init__O   sL    ll5;;q!Q#<=LLQ1c!:;	r(   r1   r   c                     t         j                  j                  |ddd      }||j                  dd      dz   z  }| j                  ||z  z  | j
                  z   |z   }|S )N   )r   rI   T)ordrB   keepdim)rB   rK   ư>)r   linalgvector_normmeanrF   rG   )r/   r1   global_featuresnorm_featuress       r&   r4   zConvNextV2GRN.forwardT   si    ,,22=aV]a2b'?+?+?BPT+?+UX\+\]}}'DE		QTaar(   )
r7   r8   r9   r:   intr.   r   FloatTensorr4   r>   r?   s   @r&   rA   rA   L   s1    3<C <
U%6%6 5;L;L r(   rA   c                   \     e Zd ZdZd fd	Zdej                  dej                  fdZ xZS )ConvNextV2LayerNormaA  LayerNorm that supports two data formats: channels_last (default) or channels_first.
    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch_size, height,
    width, channels) while channels_first corresponds to inputs with shape (batch_size, channels, height, width).
    c                 N   t         |           t        j                  t	        j
                  |            | _        t        j                  t	        j                  |            | _        || _	        || _
        | j                  dvrt        d| j                         |f| _        y )N)channels_lastchannels_firstzUnsupported data format: )r-   r.   r   rD   r   onesrF   rE   rG   epsdata_formatNotImplementedErrornormalized_shape)r/   r^   r[   r\   r0   s       r&   r.   zConvNextV2LayerNorm.__init__d   s    ll5::.>#?@LL-=!>?	&#FF%(A$BRBRAS&TUU!1 3r(   xr   c                 d   | j                   dk(  rWt        j                  j                  j	                  || j
                  | j                  | j                  | j                        }|S | j                   dk(  r|j                  }|j                         }|j                  dd      }||z
  j                  d      j                  dd      }||z
  t        j                  || j                  z         z  }|j                  |      }| j                  d d d d f   |z  | j                  d d d d f   z   }|S )NrX   rY   r   T)rK   rI   )r   )r\   r   r   
functional
layer_normr^   rF   rG   r[   r   r;   rP   powsqrtto)r/   r_   input_dtypeuss        r&   r4   zConvNextV2LayerNorm.forwardn   s
   .##..q$2G2GVZV_V_aeaiaijA  !11''K	Aq$'AQA##At#4AQ%**Q\22A;'AAtTM*Q.1dD=1IIAr(   )rM   rX   )	r7   r8   r9   r:   r.   r   r<   r4   r>   r?   s   @r&   rV   rV   ^   s(    
4 %,, r(   rV   c                   Z     e Zd ZdZ fdZdej                  dej                  fdZ xZ	S )ConvNextV2EmbeddingszThis class is comparable to (and inspired by) the SwinEmbeddings class
    found in src/transformers/models/swin/modeling_swin.py.
    c                    t         |           t        j                  |j                  |j
                  d   |j                  |j                        | _        t        |j
                  d   dd      | _	        |j                  | _        y )Nr   kernel_sizestriderM   rY   r[   r\   )
r-   r.   r   Conv2dnum_channelshidden_sizes
patch_sizepatch_embeddingsrV   	layernormr/   configr0   s     r&   r.   zConvNextV2Embeddings.__init__   sr     "		!4!4Q!7VEVEV_e_p_p!
 -V-@-@-C[kl"//r(   pixel_valuesr   c                     |j                   d   }|| j                  k7  rt        d      | j                  |      }| j	                  |      }|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r   rq   
ValueErrorrt   ru   )r/   rx   rq   
embeddingss       r&   r4   zConvNextV2Embeddings.forward   sV    #))!,4,,,w  **<8
^^J/
r(   
r7   r8   r9   r:   r.   r   rT   r<   r4   r>   r?   s   @r&   rj   rj   }   s*    0E$5$5 %,, r(   rj   c                   \     e Zd ZdZd fd	Zdej                  dej                  fdZ xZ	S )ConvNextV2Layera5  This corresponds to the `Block` class in the original implementation.

    There are two equivalent implementations: [DwConv, LayerNorm (channels_first), Conv, GELU,1x1 Conv]; all in (N, C,
    H, W) (2) [DwConv, Permute to (N, H, W, C), LayerNorm (channels_last), Linear, GELU, Linear]; Permute back

    The authors used (2) as they find it slightly faster in PyTorch.

    Args:
        config ([`ConvNextV2Config`]): Model configuration class.
        dim (`int`): Number of input channels.
        drop_path (`float`): Stochastic depth rate. Default: 0.0.
    c                    t         |           t        j                  ||dd|      | _        t        |d      | _        t        j                  |d|z        | _        t        |j                     | _        t        d|z        | _        t        j                  d|z  |      | _        |dkD  rt        |      | _        y t        j                          | _        y )N   r	   )rm   paddinggroupsrM   r[      r   )r-   r.   r   rp   dwconvrV   ru   Linearpwconv1r
   
hidden_actactrA   grnpwconv2r*   Identityr'   )r/   rw   rB   r'   r0   s       r&   r.   zConvNextV2Layer.__init__   s    iiSa3O,Sd;yya#g.&++, S)yyS#.:Cc/+I6r{{}r(   r1   r   c                 N   |}| j                  |      }|j                  dddd      }| j                  |      }| j                  |      }| j	                  |      }| j                  |      }| j                  |      }|j                  dddd      }|| j                  |      z   }|S )Nr   rI   r	   r   )r   permuteru   r   r   r   r   r'   )r/   r1   r   r_   s       r&   r4   zConvNextV2Layer.forward   s    KK&IIaAq!NN1LLOHHQKHHQKLLOIIaAq!DNN1%%r(   )r   r|   r?   s   @r&   r~   r~      s+    
]U%6%6 5<< r(   r~   c                   \     e Zd ZdZd fd	Zdej                  dej                  fdZ xZ	S )ConvNextV2Stagea  ConvNeXTV2 stage, consisting of an optional downsampling layer + multiple residual blocks.

    Args:
        config ([`ConvNextV2Config`]): Model configuration class.
        in_channels (`int`): Number of input channels.
        out_channels (`int`): Number of output channels.
        depth (`int`): Number of residual blocks.
        drop_path_rates(`list[float]`): Stochastic depth rates for each layer.
    c                 ~   t         	|           ||k7  s|dkD  r?t        j                  t	        |dd      t        j
                  ||||            | _        nt        j                         | _        |xs dg|z  }t        j                  t        |      D cg c]  }t        ||||          c} | _
        y c c}w )Nr   rM   rY   ro   rl   r   )rB   r'   )r-   r.   r   
SequentialrV   rp   downsampling_layerr   ranger~   layers)
r/   rw   in_channelsout_channelsrm   rn   depthdrop_path_ratesjr0   s
            r&   r.   zConvNextV2Stage.__init__   s    ,&&1*&(mm#KTGWX		+|U[\'D#
 ')kkmD#):cUU]mm_dej_klZ[of,/RSBTUl
ls   B:r1   r   c                 J    | j                  |      }| j                  |      }|S r,   )r   r   r3   s     r&   r4   zConvNextV2Stage.forward   s&    //>M2r(   )rI   rI   rI   Nr|   r?   s   @r&   r   r      s*    
U%6%6 5<< r(   r   c                   f     e Zd Z fdZ	 	 ddej
                  dee   dee   dee	e
f   fdZ xZS )ConvNextV2Encoderc           
      ,   t         |           t        j                         | _        t        j                  d|j                  t        |j                        d      j                  |j                        D cg c]  }|j                          }}|j                  d   }t        |j                        D ]V  }|j                  |   }t        ||||dkD  rdnd|j                  |   ||         }| j                  j!                  |       |}X y c c}w )Nr   cpu)r   rI   r   )r   r   rn   r   r   )r-   r.   r   
ModuleListstagesr   linspacedrop_path_ratesumdepthssplittolistrr   r   
num_stagesr   append)	r/   rw   r_   r   prev_chsiout_chsstager0   s	           r&   r.   zConvNextV2Encoder.__init__   s    mmo ^^Av'<'<c&-->PY^_eeflfsfst
 HHJ
 
 &&q)v(() 	A))!,G#$$EqqmmA& / 2E KKu%H	
s   :Dr1   output_hidden_statesreturn_dictr   c                     |rdnd }t        | j                        D ]  \  }}|r||fz   } ||      } |r||fz   }|st        d ||fD              S t        ||      S )N c              3   &   K   | ]	  }||  y wr,   r   ).0vs     r&   	<genexpr>z,ConvNextV2Encoder.forward.<locals>.<genexpr>
  s     Xq!-Xs   )last_hidden_stater1   )	enumerater   tupler   )r/   r1   r   r   all_hidden_statesr   layer_modules          r&   r4   zConvNextV2Encoder.forward   s     #7BD(5 	8OA|#$58H$H!(7M		8   1]4D DX]4E$FXXX-++
 	
r(   )FT)r7   r8   r9   r.   r   rT   r   boolr   r   r   r4   r>   r?   s   @r&   r   r      sT    0 05&*	
((
 'tn
 d^	

 
u44	5
r(   r   c                   ,    e Zd ZU eed<   dZdZdgZd Zy)ConvNextV2PreTrainedModelrw   
convnextv2rx   r~   c                    t        |t        j                  t        j                  f      rm|j                  j
                  j                  d| j                  j                         |j                  %|j                  j
                  j                          yyt        |t        j                  t        f      rJ|j                  j
                  j                          |j                  j
                  j                  d       yt        |t              rI|j                  j
                  j                          |j                  j
                  j                          yy)zInitialize the weightsr   )rP   stdNg      ?)
isinstancer   r   rp   rF   datanormal_rw   initializer_rangerG   zero_	LayerNormrV   fill_rA   )r/   modules     r&   _init_weightsz'ConvNextV2PreTrainedModel._init_weights  s    fryy"))45 MM&&CT[[5R5R&S{{&  &&( '/B CDKK""$MM$$S).MM$$&KK""$ /r(   N)	r7   r8   r9   r   __annotations__base_model_prefixmain_input_name_no_split_modulesr   r   r(   r&   r   r     s!    $$O*+%r(   r   c                   x     e Zd Z fdZe	 	 	 ddeej                     dee   dee   de	e
ef   fd       Z xZS )ConvNextV2Modelc                     t         |   |       || _        t        |      | _        t        |      | _        t        j                  |j                  d   |j                        | _        | j                          y )NrL   r   )r-   r.   rw   rj   r{   r   encoderr   r   rr   layer_norm_epsru   	post_initrv   s     r&   r.   zConvNextV2Model.__init__,  s`     .v6(0 f&9&9"&=6CXCXY 	r(   rx   r   r   r   c                 d   ||n| j                   j                  }||n| j                   j                  }|t        d      | j	                  |      }| j                  |||      }|d   }| j                  |j                  ddg            }|s
||f|dd  z   S t        |||j                        S )Nz You have to specify pixel_valuesr   r   r   rL   r   )r   pooler_outputr1   )
rw   r   use_return_dictrz   r{   r   ru   rP   r   r1   )r/   rx   r   r   embedding_outputencoder_outputsr   pooled_outputs           r&   r4   zConvNextV2Model.forward9  s     %9$D $++JjJj 	 &1%<k$++B]B]?@@??<8,,!5# ' 
 ,A. '8'='=r2h'GH%}58KKK7/')77
 	
r(   )NNN)r7   r8   r9   r.   r   r   r   rT   r   r   r   r   r4   r>   r?   s   @r&   r   r   )  sk      59/3&*	"
u001"
 'tn"
 d^	"

 
u>>	?"
 "
r(   r   z
    ConvNextV2 Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc                        e Zd Z fdZe	 	 	 	 ddeej                     deej                     dee	   dee	   de
eef   f
d       Z xZS )	 ConvNextV2ForImageClassificationc                 0   t         |   |       |j                  | _        t        |      | _        |j                  dkD  r-t        j                  |j                  d   |j                        nt        j                         | _	        | j                          y )Nr   rL   )r-   r.   
num_labelsr   r   r   r   rr   r   
classifierr   rv   s     r&   r.   z)ConvNextV2ForImageClassification.__init__g  sy      ++)&1 FLEVEVYZEZBIIf))"-v/@/@A`b`k`k`m 	
 	r(   rx   labelsr   r   r   c                    ||n| j                   j                  }| j                  |||      }|r|j                  n|d   }| j	                  |      }d}|| j                   j
                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rIt               }	| j                  dk(  r& |	|j                         |j                               }n |	||      }n| j                   j
                  dk(  r=t               }	 |	|j                  d| j                        |j                  d            }n,| j                   j
                  dk(  rt               }	 |	||      }|s|f|dd z   }
||f|
z   S |
S t!        |||j"                  	      S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   
regressionsingle_label_classificationmulti_label_classificationrL   rI   )losslogitsr1   )rw   r   r   r   r   problem_typer   r   r   longrS   r   squeezer   viewr   r   r1   )r/   rx   r   r   r   outputsr   r   r   loss_fctr%   s              r&   r4   z(ConvNextV2ForImageClassification.forwardu  s    &1%<k$++B]B]//,EYgr/s1<--'!*/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE3!//
 	
r(   )NNNN)r7   r8   r9   r.   r   r   r   rT   
LongTensorr   r   r   r   r4   r>   r?   s   @r&   r   r   _  s      59-1/3&*3
u0013
 ))*3
 'tn	3

 d^3
 
u::	;3
 3
r(   r   zT
    ConvNeXT V2 backbone, to be used with frameworks like DETR and MaskFormer.
    c            
       f     e Zd Z fdZe	 	 ddej                  dee   dee   de	fd       Z
 xZS )ConvNextV2Backbonec                    t         |   |       t         | 	  |       t        |      | _        t        |      | _        |j                  d   g|j                  z   | _        i }t        | j                  | j                        D ]  \  }}t        |d      ||<    t        j                  |      | _        | j!                          y )Nr   rY   )r\   )r-   r.   _init_backbonerj   r{   r   r   rr   num_featureszip_out_featureschannelsrV   r   
ModuleDicthidden_states_normsr   )r/   rw   r   r   rq   r0   s        r&   r.   zConvNextV2Backbone.__init__  s     v&.v6(0#0034v7J7JJ !#&t'9'94==#I 	iE<)<\Wg)h&	i#%==1D#E  	r(   rx   r   r   r   c                    ||n| j                   j                  }||n| j                   j                  }| j                  |      }| j	                  |d|      }|r|j
                  n|d   }d}t        | j                  |      D ]/  \  }}	|| j                  v s | j                  |   |	      }	||	fz  }1 |s|f}
|r|
|fz  }
|
S t        ||r|d      S dd      S )ar  
        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("facebook/convnextv2-tiny-1k-224")
        >>> model = AutoBackbone.from_pretrained("facebook/convnextv2-tiny-1k-224")

        >>> inputs = processor(image, return_tensors="pt")
        >>> outputs = model(**inputs)
        ```NTr   r   r   )feature_mapsr1   
attentions)rw   r   r   r{   r   r1   r   stage_namesout_featuresr   r   )r/   rx   r   r   r   r   r1   r   r   hidden_stater%   s              r&   r4   zConvNextV2Backbone.forward  s   2 &1%<k$++B]B]$8$D $++JjJj 	  ??<8,,!%#  
 2=--'!*#&t'7'7#G 	0E<)))>t77>|L/	0
 "_F#=**M%+?-
 	
EI
 	
r(   )NN)r7   r8   r9   r.   r   r   r<   r   r   r   r4   r>   r?   s   @r&   r   r     sV    "  04&*	7
ll7
 'tn7
 d^	7

 
7
 7
r(   r   )r   r   r   r   )r   F)/r:   typingr   r   r   torch.utils.checkpointr   torch.nnr   r   r   activationsr
   modeling_outputsr   r   r   r   modeling_utilsr   utilsr   r   utils.backbone_utilsr   configuration_convnextv2r   
get_loggerr7   loggerr<   r;   r   r'   Moduler*   rA   rV   rj   r~   r   r   r   r   r   r   __all__r   r(   r&   <module>r     s     "    A A !  . , 1 6 
		H	%U\\ e T V[VbVb *% %BII $")) >299 0(bii (Xbii B-
		 -
` % % %, 1
/ 1
 1
h C
'@ C
C
L J
2M J
J
Z ur(   