
    rh{              	          d Z ddlZddlmZ ddlmZmZ ddlZddlZddlm	Z	 ddl
mZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZmZmZmZmZmZ ddlmZ ddl m!Z!  e       r	ddl"m#Z#m$Z$ nd Z$d Z# ejJ                  e&      Z'e ed       G d de                    Z(e ed       G d de                    Z)e ed       G d de                    Z* G d de	jV                        Z, G d d e	jV                        Z- G d! d"e	jV                        Z.dEd#ej^                  d$e0d%e1d&ej^                  fd'Z2 G d( d)e	jV                        Z3 G d* d+e	jV                        Z4 G d, d-e	jV                        Z5 G d. d/e	jV                        Z6 G d0 d1e	jV                        Z7 G d2 d3e	jV                        Z8 G d4 d5e	jV                        Z9 G d6 d7e	jV                        Z: G d8 d9e	jV                        Z;e G d: d;e             Z<e G d< d=e<             Z= ed>       G d? d@e<             Z> edA       G dB dCe<e             Z?g dDZ@y)Fz9PyTorch Dilated Neighborhood Attention Transformer model.    N)	dataclass)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BackboneOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)ModelOutputOptionalDependencyNotAvailableauto_docstringis_natten_availableloggingrequires_backends)BackboneMixin   )DinatConfig)
natten2davnatten2dqkrpbc                      t               Nr   argskwargss     {/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/dinat/modeling_dinat.pyr   r   .       ,..    c                      t               r   r   r   s     r!   r   r   1   r"   r#   zO
    Dinat encoder's outputs, with potential hidden states and attentions.
    )custom_introc                       e Zd ZU dZdZeej                     ed<   dZ	ee
ej                  df      ed<   dZee
ej                  df      ed<   dZee
ej                  df      ed<   y)DinatEncoderOutputa  
    reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
        shape `(batch_size, hidden_size, height, width)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
        include the spatial dimensions.
    Nlast_hidden_state.hidden_states
attentionsreshaped_hidden_states)__name__
__module____qualname____doc__r(   r   torchFloatTensor__annotations__r)   tupler*   r+    r#   r!   r'   r'   ;   s}     6:x 1 129=AM8E%"3"3S"89:A:>Ju00#567>FJHU5+<+<c+A%BCJr#   r'   zW
    Dinat model's outputs that also contains a pooling of the last hidden states.
    c                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eeej                  df      ed<   dZeeej                  df      ed<   dZeeej                  df      ed<   y)	DinatModelOutputa  
    pooler_output (`torch.FloatTensor` of shape `(batch_size, hidden_size)`, *optional*, returned when `add_pooling_layer=True` is passed):
        Average pooling of the last layer hidden-state.
    reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
        shape `(batch_size, hidden_size, height, width)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
        include the spatial dimensions.
    Nr(   pooler_output.r)   r*   r+   )r,   r-   r.   r/   r(   r   r0   r1   r2   r7   r)   r3   r*   r+   r4   r#   r!   r6   r6   Q   s    	 6:x 1 12915M8E--.5=AM8E%"3"3S"89:A:>Ju00#567>FJHU5+<+<c+A%BCJr#   r6   z1
    Dinat outputs for image classification.
    c                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eeej                  df      ed<   dZeeej                  df      ed<   dZeeej                  df      ed<   y)	DinatImageClassifierOutputa7  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
        Classification (or regression if config.num_labels==1) loss.
    logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
        Classification (or regression if config.num_labels==1) scores (before SoftMax).
    reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
        shape `(batch_size, hidden_size, height, width)`.

        Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
        include the spatial dimensions.
    Nlosslogits.r)   r*   r+   )r,   r-   r.   r/   r:   r   r0   r1   r2   r;   r)   r3   r*   r+   r4   r#   r!   r9   r9   j   s     )-D(5$$
%,*.FHU&&'.=AM8E%"3"3S"89:A:>Ju00#567>FJHU5+<+<c+A%BCJr#   r9   c                   f     e Zd ZdZ fdZdeej                     deej                     fdZ
 xZS )DinatEmbeddingsz6
    Construct the patch and position embeddings.
    c                     t         |           t        |      | _        t	        j
                  |j                        | _        t	        j                  |j                        | _
        y r   )super__init__DinatPatchEmbeddingspatch_embeddingsr   	LayerNorm	embed_dimnormDropouthidden_dropout_probdropoutselfconfig	__class__s     r!   r@   zDinatEmbeddings.__init__   sG     4V <LL!1!12	zz&"<"<=r#   pixel_valuesreturnc                 l    | j                  |      }| j                  |      }| j                  |      }|S r   )rB   rE   rH   )rJ   rM   
embeddingss      r!   forwardzDinatEmbeddings.forward   s4    **<8
YYz*
\\*-
r#   )r,   r-   r.   r/   r@   r   r0   r1   r3   TensorrQ   __classcell__rL   s   @r!   r=   r=      s4    >HU->->$? E%,,DW r#   r=   c                   `     e Zd ZdZ fdZdeej                     dej                  fdZ	 xZ
S )rA   z
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, height, width, hidden_size)` to be consumed by a
    Transformer.
    c           
      P   t         |           |j                  }|j                  |j                  }}|| _        |dk(  rnt        d      t        j                  t        j                  | j                  |dz  ddd      t        j                  |dz  |ddd            | _	        y )N   z2Dinat only supports patch size of 4 at the moment.   r
   r
   rX   rX   r   r   )kernel_sizestridepadding)
r?   r@   
patch_sizenum_channelsrD   
ValueErrorr   
SequentialConv2d
projection)rJ   rK   r_   r`   hidden_sizerL   s        r!   r@   zDinatPatchEmbeddings.__init__   s    &&
$*$7$79I9Ik(? QRR--IId'')9vV\flmIIkQ&PV`fg
r#   rM   rN   c                     |j                   \  }}}}|| j                  k7  rt        d      | j                  |      }|j	                  dddd      }|S )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.r   rX   r
   r   )shaper`   ra   rd   permute)rJ   rM   _r`   heightwidthrP   s          r!   rQ   zDinatPatchEmbeddings.forward   s`    )5););&<4,,,w  __\2
''1a3
r#   )r,   r-   r.   r/   r@   r   r0   r1   rR   rQ   rS   rT   s   @r!   rA   rA      s/    
"	HU->->$? 	ELL 	r#   rA   c                        e Zd ZdZej
                  fdedej                  ddf fdZde	j                  de	j                  fdZ xZS )	DinatDownsamplerz
    Convolutional Downsampling Layer.

    Args:
        dim (`int`):
            Number of input channels.
        norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`):
            Normalization layer class.
    dim
norm_layerrN   Nc                     t         |           || _        t        j                  |d|z  dddd      | _         |d|z        | _        y )NrX   rY   rZ   r[   F)r\   r]   r^   bias)r?   r@   rn   r   rc   	reductionrE   )rJ   rn   ro   rL   s      r!   r@   zDinatDownsampler.__init__   sE    3CVF\binoq3w'	r#   input_featurec                     | j                  |j                  dddd            j                  dddd      }| j                  |      }|S )Nr   r
   r   rX   )rr   rh   rE   )rJ   rs   s     r!   rQ   zDinatDownsampler.forward   sJ    }'<'<Q1a'HIQQRSUVXY[\]		-0r#   )r,   r-   r.   r/   r   rC   intModuler@   r0   rR   rQ   rS   rT   s   @r!   rm   rm      sJ     :< (C (RYY ($ (U\\ ell r#   rm   input	drop_probtrainingrN   c                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
            r   r   )r   )dtypedevice)rg   ndimr0   randr|   r}   floor_div)rw   rx   ry   	keep_probrg   random_tensoroutputs          r!   	drop_pathr      s     CxII[[^

Q 77E

5ELL YYMYYy!M1FMr#   c                   x     e Zd ZdZd	dee   ddf fdZdej                  dej                  fdZ	de
fdZ xZS )
DinatDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nrx   rN   c                 0    t         |           || _        y r   )r?   r@   rx   )rJ   rx   rL   s     r!   r@   zDinatDropPath.__init__   s    "r#   r)   c                 D    t        || j                  | j                        S r   )r   rx   ry   rJ   r)   s     r!   rQ   zDinatDropPath.forward   s    FFr#   c                      d| j                    S )Nzp=)rx   rJ   s    r!   
extra_reprzDinatDropPath.extra_repr   s    DNN#$$r#   r   )r,   r-   r.   r/   r   floatr@   r0   rR   rQ   strr   rS   rT   s   @r!   r   r      sG    b#(5/ #T #GU\\ Gell G%C %r#   r   c                   j     e Zd Z fdZ	 ddej
                  dee   deej
                     fdZ	 xZ
S )NeighborhoodAttentionc                 *   t         |           ||z  dk7  rt        d| d| d      || _        t	        ||z        | _        | j                  | j
                  z  | _        || _        || _        t        j                  t        j                  |d| j                  z  dz
  d| j                  z  dz
              | _        t        j                  | j                  | j                  |j                        | _        t        j                  | j                  | j                  |j                        | _        t        j                  | j                  | j                  |j                        | _        t        j&                  |j(                        | _        y )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()rX   r   )rq   )r?   r@   ra   num_attention_headsru   attention_head_sizeall_head_sizer\   dilationr   	Parameterr0   zerosrpbLinearqkv_biasquerykeyvaluerF   attention_probs_dropout_probrH   rJ   rK   rn   	num_headsr\   r   rL   s         r!   r@   zNeighborhoodAttention.__init__   sD   ?a#C5(^_h^iijk  $- #&sY#7 !558P8PP&  <<ID<L<L8Lq8PTUX\XhXhThklTl noYYt1143E3EFOO\
99T//1C1C&//ZYYt1143E3EFOO\
zz&"E"EFr#   r)   output_attentionsrN   c                    |j                   \  }}}| j                  |      j                  |d| j                  | j                        j                  dd      }| j                  |      j                  |d| j                  | j                        j                  dd      }| j                  |      j                  |d| j                  | j                        j                  dd      }|t        j                  | j                        z  }t        ||| j                  | j                  | j                        }	t        j                  j!                  |	d      }
| j#                  |
      }
t%        |
|| j                  | j                        }|j'                  ddddd      j)                         }|j+                         d d | j,                  fz   }|j                  |      }|r||
f}|S |f}|S )	Nr   rX   rn   r   r
   rW   )rg   r   viewr   r   	transposer   r   mathsqrtr   r   r\   r   r   
functionalsoftmaxrH   r   rh   
contiguoussizer   )rJ   r)   r   
batch_size
seq_lengthri   query_layer	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss                 r!   rQ   zNeighborhoodAttention.forward  s   
 %2$7$7!
JJJ}%T*b$":":D<T<TUYq!_ 	 HH]#T*b$":":D<T<TUYq!_ 	 JJ}%T*b$":":D<T<TUYq!_ 	 "DIId.F.F$GG )i4K[K[]a]j]jk --//0@b/I ,,7"?KAQAQSWS`S`a%--aAq!<GGI"/"4"4"6s";t?Q?Q>S"S%**+BC6G=/2 O\M]r#   Fr,   r-   r.   r@   r0   rR   r   boolr3   rQ   rS   rT   s   @r!   r   r      s@    G2 -2,||, $D>, 
u||		,r#   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )NeighborhoodAttentionOutputc                     t         |           t        j                  ||      | _        t        j
                  |j                        | _        y r   )r?   r@   r   r   denserF   r   rH   rJ   rK   rn   rL   s      r!   r@   z$NeighborhoodAttentionOutput.__init__A  s6    YYsC(
zz&"E"EFr#   r)   input_tensorrN   c                 J    | j                  |      }| j                  |      }|S r   r   rH   )rJ   r)   r   s      r!   rQ   z#NeighborhoodAttentionOutput.forwardF  s$    

=1]3r#   r,   r-   r.   r@   r0   rR   rQ   rS   rT   s   @r!   r   r   @  s2    G
U\\  RWR^R^ r#   r   c                   p     e Zd Z fdZd Z	 ddej                  dee   de	ej                     fdZ
 xZS )NeighborhoodAttentionModulec                     t         |           t        |||||      | _        t	        ||      | _        t               | _        y r   )r?   r@   r   rJ   r   r   setpruned_headsr   s         r!   r@   z$NeighborhoodAttentionModule.__init__N  s:    )&#y+xX	1&#>Er#   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   r   )lenr   rJ   r   r   r   r   r   r   r   r   r   r   union)rJ   headsindexs      r!   prune_headsz'NeighborhoodAttentionModule.prune_headsT  s   u:?749900$))2O2OQUQbQb
u
 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r#   r)   r   rN   c                 f    | j                  ||      }| j                  |d   |      }|f|dd  z   }|S Nr   r   )rJ   r   )rJ   r)   r   self_outputsattention_outputr   s         r!   rQ   z#NeighborhoodAttentionModule.forwardf  sC    
 yy0AB;;|AF#%QR(88r#   r   )r,   r-   r.   r@   r   r0   rR   r   r   r3   rQ   rS   rT   s   @r!   r   r   M  sD    ";* -2|| $D> 
u||		r#   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )DinatIntermediatec                    t         |           t        j                  |t	        |j
                  |z              | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y r   )r?   r@   r   r   ru   	mlp_ratior   
isinstance
hidden_actr   r   intermediate_act_fnr   s      r!   r@   zDinatIntermediate.__init__r  sa    YYsC(8(83(>$?@
f''-'-f.?.?'@D$'-'8'8D$r#   r)   rN   c                 J    | j                  |      }| j                  |      }|S r   )r   r   r   s     r!   rQ   zDinatIntermediate.forwardz  s&    

=100?r#   r   rT   s   @r!   r   r   q  s#    9U\\ ell r#   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )DinatOutputc                     t         |           t        j                  t	        |j
                  |z        |      | _        t        j                  |j                        | _	        y r   )
r?   r@   r   r   ru   r   r   rF   rG   rH   r   s      r!   r@   zDinatOutput.__init__  sF    YYs6#3#3c#9:C@
zz&"<"<=r#   r)   rN   c                 J    | j                  |      }| j                  |      }|S r   r   r   s     r!   rQ   zDinatOutput.forward  s$    

=1]3r#   r   rT   s   @r!   r   r     s#    >
U\\ ell r#   r   c            	            e Zd Zd fd	Zd Z	 ddej                  dee   de	ej                  ej                  f   fdZ
 xZS )	
DinatLayerc                    t         |           |j                  | _        |j                  | _        || _        | j                  | j                  z  | _        t        j                  ||j                        | _	        t        |||| j                  | j                        | _        |dkD  rt        |      nt        j                         | _        t        j                  ||j                        | _        t!        ||      | _        t%        ||      | _        |j(                  dkD  r?t        j*                  |j(                  t-        j.                  d|f      z  d      | _        y d | _        y )Neps)r\   r   r{   r   rX   T)requires_grad)r?   r@   chunk_size_feed_forwardr\   r   window_sizer   rC   layer_norm_epslayernorm_beforer   	attentionr   Identityr   layernorm_afterr   intermediater   r   layer_scale_init_valuer   r0   oneslayer_scale_parameters)rJ   rK   rn   r   r   drop_path_raterL   s         r!   r@   zDinatLayer.__init__  s(   '-'E'E$!-- ++dmm; "Sf6K6K L4C0@0@4==
 ;I3:N~6TVT_T_Ta!||CV5J5JK-fc:!&#. ,,q0 LL66QH9MM]ab 	#  	#r#   c                     | j                   }d}||k  s||k  rJdx}}t        d||z
        }t        d||z
        }	dd||||	f}t        j                  j	                  ||      }||fS )N)r   r   r   r   r   r   r   )r   maxr   r   pad)
rJ   r)   rj   rk   r   
pad_valuespad_lpad_tpad_rpad_bs
             r!   	maybe_padzDinatLayer.maybe_pad  s    &&'
K5;#6EE;./E;/0EQueU;JMM--mZHMj((r#   r)   r   rN   c                    |j                         \  }}}}|}| j                  |      }| j                  |||      \  }}|j                  \  }	}
}}	| j	                  ||      }|d   }|d   dkD  xs |d   dkD  }|r|d d d |d |d d f   j                         }| j                  | j                  d   |z  }|| j                  |      z   }| j                  |      }| j                  | j                  |            }| j                  | j                  d   |z  }|| j                  |      z   }|r	||d   f}|S |f}|S )N)r   r   r
      r   )r   r   r   rg   r   r   r   r   r   r   r   )rJ   r)   r   r   rj   rk   channelsshortcutr   ri   
height_pad	width_padattention_outputsr   
was_paddedlayer_outputlayer_outputss                    r!   rQ   zDinatLayer.forward  s|   
 /<.@.@.B+
FE8 --m<$(NN=&%$P!z&3&9&9#:y! NN=L]N^,Q/]Q&;*Q-!*;
/7F7FUFA0EFQQS&&2#::1=@PP 4>>2B#CC++M:{{4#4#4\#BC&&266q9LHL$t~~l'CC@Q'8';< YeWfr#   )r{   r   )r,   r-   r.   r@   r   r0   rR   r   r   r3   rQ   rS   rT   s   @r!   r   r     sM    
(	) -2$||$ $D>$ 
u||U\\)	*	$r#   r   c                   j     e Zd Z fdZ	 ddej
                  dee   deej
                     fdZ	 xZ
S )
DinatStagec                 <   t         	|           || _        || _        t	        j
                  t        |      D cg c]  }t        |||||   ||          c}      | _        |% ||t        j                        | _
        d| _        y d | _
        d| _        y c c}w )N)rK   rn   r   r   r   )rn   ro   F)r?   r@   rK   rn   r   
ModuleListranger   layersrC   
downsamplepointing)
rJ   rK   rn   depthr   	dilationsr   r  irL   s
            r!   r@   zDinatStage.__init__  s    mm u	  !'&q\#1!#4	
 !(SR\\JDO  #DO%	s   Br)   r   rN   c                     |j                         \  }}}}t        | j                        D ]  \  }} |||      }|d   } |}	| j                  | j                  |	      }||	f}
|r|
dd  z  }
|
S r   )r   	enumerater  r  )rJ   r)   r   ri   rj   rk   r  layer_moduler  !hidden_states_before_downsamplingstage_outputss              r!   rQ   zDinatStage.forward  s    
 ,00265!(5 	-OA|(8IJM)!,M	- -:)??& OO,MNM&(IJ]12..Mr#   r   r   rT   s   @r!   r  r    s?    8 -2|| $D> 
u||		r#   r  c                   ~     e Zd Z fdZ	 	 	 	 d	dej
                  dee   dee   dee   dee   dee	e
f   fdZ xZS )
DinatEncoderc                    t         |           t        |j                        | _        || _        t        j                  d|j                  t        |j                        d      D cg c]  }|j                          }}t        j                  t        | j                        D cg c]  }t        |t        |j                   d|z  z        |j                  |   |j"                  |   |j$                  |   |t        |j                  d |       t        |j                  d |dz           || j                  dz
  k  rt&        nd        c}      | _        y c c}w c c}w )Nr   cpu)r}   rX   r   )rK   rn   r  r   r  r   r  )r?   r@   r   depths
num_levelsrK   r0   linspacer   sumitemr   r	  r
  r  ru   rD   r   r  rm   levels)rJ   rK   xdpri_layerrL   s        r!   r@   zDinatEncoder.__init__  s,   fmm,!&63H3H#fmmJ\ej!klAqvvxllmm  %T__5  !F,,q'z9: --0$..w7$..w7#&s6=='+B'Cc&--XeZadeZeJfFg#h4;dooPQ>Q4Q/X\
 ms   )E(B#Er)   r   output_hidden_states(output_hidden_states_before_downsamplingreturn_dictrN   c                    |rdnd }|rdnd }|rdnd }|r |j                  dddd      }	||fz  }||	fz  }t        | j                        D ]l  \  }
} |||      }|d   }|d   }|r#|r!|j                  dddd      }	||fz  }||	fz  }n$|r"|s |j                  dddd      }	||fz  }||	fz  }|se||dd  z  }n |st        d |||fD              S t	        ||||      S )Nr4   r   r
   r   rX   c              3   &   K   | ]	  }||  y wr   r4   ).0vs     r!   	<genexpr>z'DinatEncoder.forward.<locals>.<genexpr>>  s     mq_`_lms   )r(   r)   r*   r+   )rh   r  r  r3   r'   )rJ   r)   r   r#  r$  r%  all_hidden_statesall_reshaped_hidden_statesall_self_attentionsreshaped_hidden_stater  r  r  r  s                 r!   rQ   zDinatEncoder.forward  s]    #7BD+?RT"$5b4$1$9$9!Q1$E!-!11&+@*BB&(5 	9OA|(8IJM)!,M0=a0@-#(P(I(Q(QRSUVXY[\(]%!&G%II!*/D.FF*%.V(5(=(=aAq(I%!m%55!*/D.FF* #}QR'88#%	9( m]4EGZ$[mmm!++*#=	
 	
r#   )FFFT)r,   r-   r.   r@   r0   rR   r   r   r   r3   r'   rQ   rS   rT   s   @r!   r  r    st    
. -2/4CH&*.
||.
 $D>.
 'tn	.

 3;4..
 d^.
 
u((	).
r#   r  c                   &    e Zd ZU eed<   dZdZd Zy)DinatPreTrainedModelrK   dinatrM   c                    t        |t        j                  t        j                  f      rm|j                  j
                  j                  d| j                  j                         |j                  %|j                  j
                  j                          yyt        |t        j                        rJ|j                  j
                  j                          |j                  j
                  j                  d       yy)zInitialize the weightsr{   )meanstdNg      ?)r   r   r   rc   weightdatanormal_rK   initializer_rangerq   zero_rC   fill_)rJ   modules     r!   _init_weightsz"DinatPreTrainedModel._init_weightsN  s    fryy"))45 MM&&CT[[5R5R&S{{&  &&( '-KK""$MM$$S) .r#   N)r,   r-   r.   r   r2   base_model_prefixmain_input_namer<  r4   r#   r!   r0  r0  H  s    $O
*r#   r0  c                        e Zd Zd
 fd	Zd Zd Ze	 	 	 	 ddeej                     dee
   dee
   dee
   deeef   f
d	       Z xZS )
DinatModelc                    t         |   |       t        | dg       || _        t	        |j
                        | _        t        |j                  d| j                  dz
  z  z        | _	        t        |      | _        t        |      | _        t        j                  | j                  |j                         | _        |rt        j$                  d      nd| _        | j)                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        nattenrX   r   r   N)r?   r@   r   rK   r   r  r  ru   rD   num_featuresr=   rP   r  encoderr   rC   r   	layernormAdaptiveAvgPool1dpooler	post_init)rJ   rK   add_pooling_layerrL   s      r!   r@   zDinatModel.__init__]  s    
 	 $
+fmm, 0 0119L3M MN)&1#F+d&7&7V=R=RS1Bb**1- 	r#   c                 .    | j                   j                  S r   rP   rB   r   s    r!   get_input_embeddingszDinatModel.get_input_embeddingss      ///r#   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsrD  layerr   r   )rJ   heads_to_prunerP  r   s       r!   _prune_headszDinatModel._prune_headsv  sE    
 +002 	CLE5LLu%//;;EB	Cr#   rM   r   r#  r%  rN   c                 R   ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }|t	        d      | j                  |      }| j                  ||||      }|d   }| j                  |      }d }| j                  G| j                  |j                  dd      j                  dd            }t        j                  |d      }|s||f|dd  z   }	|	S t        |||j                  |j                  |j                        S )Nz You have to specify pixel_valuesr   r#  r%  r   r   rX   )r(   r7   r)   r*   r+   )rK   r   r#  use_return_dictra   rP   rD  rE  rG  flattenr   r0   r6   r)   r*   r+   )
rJ   rM   r   r#  r%  embedding_outputencoder_outputssequence_outputpooled_outputr   s
             r!   rQ   zDinatModel.forward~  sA    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]?@@??<8,,/!5#	 ' 
 *!,..9;;" KK(?(?1(E(O(OPQST(UVM!MM-;M%}58KKFM-')77&11#2#I#I
 	
r#   )T)NNNN)r,   r-   r.   r@   rL  rR  r   r   r0   r1   r   r   r3   r6   rQ   rS   rT   s   @r!   r@  r@  [  s    ,0C  59,0/3&*,
u001,
 $D>,
 'tn	,

 d^,
 
u&&	',
 ,
r#   r@  z
    Dinat Model transformer with an image classification head on top (a linear layer on top of the final hidden state
    of the [CLS] token) e.g. for ImageNet.
    c                        e Zd Z fdZe	 	 	 	 	 d	deej                     deej                     dee	   dee	   dee	   de
eef   fd       Z xZS )
DinatForImageClassificationc                 X   t         |   |       t        | dg       |j                  | _        t	        |      | _        |j                  dkD  r4t        j                  | j
                  j                  |j                        nt        j                         | _
        | j                          y )NrB  r   )r?   r@   r   
num_labelsr@  r1  r   r   rC  r   
classifierrH  rI   s     r!   r@   z$DinatForImageClassification.__init__  s     $
+ ++'
 FLEVEVYZEZBIIdjj--v/@/@A`b`k`k`m 	
 	r#   rM   labelsr   r#  r%  rN   c                 *   ||n| j                   j                  }| j                  ||||      }|d   }| j                  |      }d}	|| j                   j                  | j
                  dk(  rd| j                   _        nl| j
                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j                  dk(  rIt               }
| j
                  dk(  r& |
|j                         |j                               }	n |
||      }	n| j                   j                  dk(  r=t               }
 |
|j                  d| j
                        |j                  d            }	n,| j                   j                  dk(  rt               }
 |
||      }	|s|f|dd z   }|	|	f|z   S |S t        |	||j                   |j"                  |j$                  	      S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        NrT  r   
regressionsingle_label_classificationmulti_label_classificationr   rX   )r:   r;   r)   r*   r+   )rK   rU  r1  r_  problem_typer^  r|   r0   longru   r	   squeezer   r   r   r9   r)   r*   r+   )rJ   rM   r`  r   r#  r%  r   rZ  r;   r:   loss_fctr   s               r!   rQ   z#DinatForImageClassification.forward  s    &1%<k$++B]B]**/!5#	  
  
/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE)!//))#*#A#A
 	
r#   )NNNNN)r,   r-   r.   r@   r   r   r0   r1   
LongTensorr   r   r3   r9   rQ   rS   rT   s   @r!   r\  r\    s       59-1,0/3&*<
u001<
 ))*<
 $D>	<

 'tn<
 d^<
 
u00	1<
 <
r#   r\  zL
    NAT backbone, to be used with frameworks like DETR and MaskFormer.
    c                   x     e Zd Z fdZd Ze	 	 	 d	dej                  dee	   dee	   dee	   de
f
d       Z xZS )
DinatBackbonec           	      .   t         |   |       t         | 	  |       t        | dg       t	        |      | _        t        |      | _        |j                  gt        t        |j                              D cg c]  }t        |j                  d|z  z         c}z   | _        i }t        | j                  | j                         D ]  \  }}t#        j$                  |      ||<    t#        j&                  |      | _        | j+                          y c c}w )NrB  rX   )r?   r@   _init_backboner   r=   rP   r  rD  rD   r
  r   r  ru   rC  zip_out_featuresr   r   rC   
ModuleDicthidden_states_normsrH  )rJ   rK   r  rq  stager`   rL   s         r!   r@   zDinatBackbone.__init__  s     v&$
+)&1#F+#--.X]^abhbobo^pXq1rST#f6F6FA6M2N1rr !#&t'9'94==#I 	DE<)+l)C&	D#%==1D#E  	 2ss   9"Dc                 .    | j                   j                  S r   rK  r   s    r!   rL  z"DinatBackbone.get_input_embeddings  rM  r#   rM   r#  r   r%  rN   c                    ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }| j	                  |      }| j                  ||ddd      }|j                  }d}t        | j                  |      D ]  \  }	}
|	| j                  v s|
j                  \  }}}}|
j                  dddd      j                         }
|
j                  |||z  |      }
 | j                  |	   |
      }
|
j                  ||||      }
|
j                  dddd      j                         }
||
fz  } |s|f}|r||j                  fz  }|S t!        ||r|j                  nd|j"                  	      S )
a/  
        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("shi-labs/nat-mini-in1k-224")
        >>> model = AutoBackbone.from_pretrained(
        ...     "shi-labs/nat-mini-in1k-224", out_features=["stage1", "stage2", "stage3", "stage4"]
        ... )

        >>> inputs = processor(image, return_tensors="pt")

        >>> outputs = model(**inputs)

        >>> feature_maps = outputs.feature_maps
        >>> list(feature_maps[-1].shape)
        [1, 512, 7, 7]
        ```NT)r   r#  r$  r%  r4   r   rX   r
   r   )feature_mapsr)   r*   )rK   rU  r#  r   rP   rD  r+   rn  stage_namesout_featuresrg   rh   r   r   rq  r)   r   r*   )rJ   rM   r#  r   r%  rW  r   r)   ru  rr  hidden_stater   r`   rj   rk   r   s                   r!   rQ   zDinatBackbone.forward!  s   B &1%<k$++B]B]$8$D $++JjJj 	 2C1N-TXT_T_TqTq??<8,,/!%59  
  66#&t'7'7#G 	0E<))):F:L:L7
L&%+33Aq!Q?JJL+00Ve^\Z>t77>|L+00VULY+33Aq!Q?JJL/	0 "_F#70022M%3G'//T))
 	
r#   )NNN)r,   r-   r.   r@   rL  r   r0   rR   r   r   r   rQ   rS   rT   s   @r!   rk  rk    ss    &0  04,0&*G
llG
 'tnG
 $D>	G

 d^G
 
G
 G
r#   rk  )r\  r@  r0  rk  )r{   F)Ar/   r   dataclassesr   typingr   r   r0   torch.utils.checkpointr   torch.nnr   r   r	   activationsr   modeling_outputsr   modeling_utilsr   pytorch_utilsr   r   utilsr   r   r   r   r   r   utils.backbone_utilsr   configuration_dinatr   natten.functionalr   r   
get_loggerr,   loggerr'   r6   r9   rv   r=   rA   rm   rR   r   r   r   r   r   r   r   r   r   r   r  r  r0  r@  r\  rk  __all__r4   r#   r!   <module>r     s   @  ! "    A A ! . - Q  2 , ;;// 
		H	% 
K K K  
K{ K K& 
K K K*bii ,!299 !Hryy 0U\\ e T V[VbVb *%BII %CBII CL
")) 
!")) !H		 	")) 	D DN, ,^C
299 C
L *? * *$ O
% O
 O
d N
"6 N
N
b 
_
(- _

_
D ar#   