
    rh                    ~   d Z ddlmZ ddlZddlmZ ddlmZm	Z	m
Z
mZ ddlmZmZmZmZ ddlmZmZmZmZmZ dd	lmZmZ dd
lmZ ddlmZ  ej<                  e      Z dZ!dZ"g dZ#dZ$dZ%d@dAdZ& G d dejN                  jP                        Z) G d dejN                  jP                        Z* G d dejN                  jP                        Z+ G d dejN                  jP                        Z, G d dejN                  jP                        Z- G d dejN                  jP                        Z. G d dejN                  jP                        Z/ G d  d!ejN                  jP                        Z0 G d" d#ejN                  jP                        Z1 G d$ d%ejN                  jP                        Z2 G d& d'ejN                  jP                        Z3 G d( d)ejN                  jP                        Z4e G d* d+ejN                  jP                               Z5 G d, d-e      Z6d.Z7d/Z8 e	d0e7       G d1 d2e6             Z9 e	d3e7       G d4 d5e6e             Z: G d6 d7ejN                  jP                        Z; G d8 d9ejN                  jP                        Z< G d: d;ejN                  jP                        Z= e	d<e7       G d= d>e6             Z>g d?Z?y)BzTensorFlow 2.0 MobileViT model.    )annotationsN   )get_tf_activation)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardreplace_return_docstrings)TFBaseModelOutputTFBaseModelOutputWithPooling&TFImageClassifierOutputWithNoAttention(TFSemanticSegmenterOutputWithNoAttention)TFPreTrainedModelTFSequenceClassificationLosskeraskeras_serializableunpack_inputs)
shape_liststable_softmax)logging   )MobileViTConfigr   zapple/mobilevit-small)r   i     r   ztabby, tabby catc                |    ||}t        |t        | |dz  z         |z  |z        }|d| z  k  r||z  }t        |      S )a  
    Ensure that all layers have a channel count that is divisible by `divisor`. This function is taken from the
    original TensorFlow repo. It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
       g?)maxint)valuedivisor	min_value	new_values       /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/mobilevit/modeling_tf_mobilevit.pymake_divisibler"   >   sS     	Is57Q;#677BWLMI3;W	y>    c                  j     e Zd Z	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZdddZddZ xZS )	TFMobileViTConvLayerc           
        t        |   di | t        j                  d| j                  j
                   d       t        |dz
  dz        |z  }t        j                  j                  |      | _
        ||z  dk7  rt        d| d| d      t        j                  j                  |||d	|||d
      | _        |	r(t        j                  j                  ddd      | _        nd | _        |
rht!        |
t"              rt%        |
      | _        nNt!        |j(                  t"              rt%        |j(                        | _        n|j(                  | _        nd | _        || _        || _        y )N
z has backpropagation operations that are NOT supported on CPU. If you wish to train/fine-tune this model, you need a GPU or a TPUr   r   r   zOutput channels (z) are not divisible by z groups.VALIDconvolution)filterskernel_sizestridespaddingdilation_rategroupsuse_biasnamegh㈵>g?normalization)epsilonmomentumr1    )super__init__loggerwarning	__class____name__r   r   layersZeroPadding2Dr-   
ValueErrorConv2Dr)   BatchNormalizationr2   
isinstancestrr   
activation
hidden_actin_channelsout_channels)selfconfigrE   rF   r+   strider/   biasdilationuse_normalizationuse_activationkwargsr-   r:   s                r!   r7   zTFMobileViTConvLayer.__init__N   s[    	"6"(() *E E	

 {Q!+,x7||11':& A%0>UV\U]]efgg <<.. #" / 	
 !&!@!@X[bq!@!rD!%D.#."3N"CF--s3"3F4E4E"F"("3"3"DO&(r#   c                    | j                  |      }| j                  |      }| j                  | j                  ||      }| j                  | j                  |      }|S Ntraining)r-   r)   r2   rC   )rG   featuresrR   padded_featuress       r!   callzTFMobileViTConvLayer.call   s^    ,,x0##O4)))(X)FH??&x0Hr#   c                ,   | j                   ry d| _         t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d d | j                  g       d d d        t        | dd       st        | j                  d      r\t        j                  | j                  j
                        5  | j                  j                  d d d | j                  g       d d d        y y y # 1 sw Y   xY w# 1 sw Y   y xY w)NTr)   r2   r1   )builtgetattrtf
name_scoper)   r1   buildrE   hasattrr2   rF   rG   input_shapes     r!   r[   zTFMobileViTConvLayer.build   s    ::
4-9t//445 M  &&dD$:J:J'KLM4$/;t))62]]4#5#5#:#:; T&&,,dD$@Q@Q-RST T 3 <M MT Ts   *C>	*D
>D
D)r   r   Fr   TT)rH   r   rE   r   rF   r   r+   r   rI   r   r/   r   rJ   boolrK   r   rL   r_   rM   z
bool | strreturnNoneFrS   	tf.TensorrR   r_   r`   rd   Nr;   
__module____qualname__r7   rU   r[   __classcell__r:   s   @r!   r%   r%   M   s     "&%)4)4) 4) 	4)
 4) 4) 4) 4) 4)  4) #4) 
4)l
Tr#   r%   c                  P     e Zd ZdZ	 d	 	 	 	 	 	 	 	 	 	 	 d fdZdddZd	dZ xZS )
TFMobileViTInvertedResidualzY
    Inverted residual block (MobileNetv2): https://huggingface.co/papers/1801.04381
    c           
     H   t        |   di | t        t        t	        ||j
                  z              d      }|dvrt        d| d      |dk(  xr ||k(  | _        t        |||dd      | _	        t        |||d|||d	
      | _
        t        |||ddd      | _        y )Nr   )r   r   zInvalid stride .r   
expand_1x1rE   rF   r+   r1   r   conv_3x3)rE   rF   r+   rI   r/   rK   r1   F
reduce_1x1rE   rF   r+   rM   r1   r5   )r6   r7   r"   r   roundexpand_ratior>   use_residualr%   ro   rq   rr   )	rG   rH   rE   rF   rI   rK   rN   expanded_channelsr:   s	           r!   r7   z$TFMobileViTInvertedResidual.__init__   s     	"6"*3u[6CVCV5V/W+XZ[\vha899#q[K{l/J.:KYZam
 -)*$	
 /)% 
r#   c                    |}| j                  ||      }| j                  ||      }| j                  ||      }| j                  r||z   S |S rP   )ro   rq   rr   rv   )rG   rS   rR   residuals       r!   rU   z TFMobileViTInvertedResidual.call   sU    ??8h??==H==??8h??&*&7&7x("EXEr#   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   qxY w# 1 sw Y   y xY w)NTro   rq   rr   )	rW   rX   rY   rZ   ro   r1   r[   rq   rr   r]   s     r!   r[   z!TFMobileViTInvertedResidual.build   s   ::
4t,8t334 ,%%d+,4T*6t}}112 *##D)*4t,8t334 ,%%d+, , 9, ,* *, ,s$   D%%D1?D=%D.1D:=Er   )rH   r   rE   r   rF   r   rI   r   rK   r   r`   ra   rb   rc   re   r;   rg   rh   __doc__r7   rU   r[   ri   rj   s   @r!   rl   rl      sP    
 jk!
%!
47!
GJ!
TW!
cf!
	!
FF,r#   rl   c                  N     e Zd Z	 	 d	 	 	 	 	 	 	 	 	 	 	 d fdZdddZddZ xZS )	TFMobileViTMobileNetLayerc           	         t        	|   di | g | _        t        |      D ]9  }t	        ||||dk(  r|ndd|       }| j                  j                  |       |}; y )Nr   r   layer.)rE   rF   rI   r1   r5   )r6   r7   r<   rangerl   append)
rG   rH   rE   rF   rI   
num_stagesrN   ilayerr:   s
            r!   r7   z"TFMobileViTMobileNetLayer.__init__   sp     	"6"z" 		'A/')!"avQaS\E KKu%&K		'r#   c                <    | j                   D ]  } |||      } |S rP   r<   )rG   rS   rR   layer_modules       r!   rU   zTFMobileViTMobileNetLayer.call   s(     KK 	AL#Hx@H	Ar#   c                    | j                   ry d| _         t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   IxY wNTr<   rW   rX   r<   rY   rZ   r1   r[   rG   r^   r   s      r!   r[   zTFMobileViTMobileNetLayer.build   t    ::
44(4 $ -]]<#4#45 - &&t,- -- 5- -   A..A7	)r   r   )rH   r   rE   r   rF   r   rI   r   r   r   r`   ra   rb   rc   re   rf   rj   s   @r!   r   r      sT     '' ' 	'
 ' ' 
'.
-r#   r   c                  :     e Zd Zd fdZddZdddZd	dZ xZS )
TFMobileViTSelfAttentionc                   t        |   d
i | ||j                  z  dk7  rt        d| d|j                   d      |j                  | _        t	        ||j                  z        | _        | j                  | j
                  z  | _        t        j                  | j
                  t        j                        }t        j                  j                  |      | _        t        j                  j                  | j                  |j                   d      | _        t        j                  j                  | j                  |j                   d      | _        t        j                  j                  | j                  |j                   d	      | _        t        j                  j)                  |j*                        | _        || _        y )Nr   zThe hidden size z4 is not a multiple of the number of attention heads rn   dtypequery)r0   r1   keyr   r5   )r6   r7   num_attention_headsr>   r   attention_head_sizeall_head_sizerY   castfloat32mathsqrtscaler   r<   Denseqkv_biasr   r   r   Dropoutattention_probs_dropout_probdropouthidden_size)rG   rH   r   rN   r   r:   s        r!   r7   z!TFMobileViTSelfAttention.__init__  s]   "6"333q8";- 0334A7 
 $*#=#= #&{V5O5O'O#P !558P8PP00

CWW\\%(
\\''(:(:V__[b'c
<<%%d&8&86??Y^%_\\''(:(:V__[b'c
||++F,O,OP&r#   c                    t        j                  |      d   }t        j                  ||d| j                  | j                  f      }t        j
                  |g d      S )Nr   shaper   r   r   r   perm)rY   r   reshaper   r   	transpose)rG   x
batch_sizes      r!   transpose_for_scoresz-TFMobileViTSelfAttention.transpose_for_scores  sI    XXa[^
JJqR1I1I4KcKc de||AL11r#   c                *   t        j                  |      d   }| j                  | j                  |            }| j                  | j	                  |            }| j                  | j                  |            }t        j                  ||d      }|| j                  z  }t        |d      }| j                  ||      }t        j                  ||      }	t        j                  |	g d      }	t        j                  |	|d| j                  f	      }	|	S )
Nr   T)transpose_br   axisrQ   r   r   r   )rY   r   r   r   r   r   matmulr   r   r   r   r   r   )
rG   hidden_statesrR   r   	key_layervalue_layerquery_layerattention_scoresattention_probscontext_layers
             r!   rU   zTFMobileViTSelfAttention.call  s    XXm,Q/
--dhh}.EF	//

=0IJ//

=0IJ 99[)N+djj8 ))9C ,,,J		/;?]F

=RI[I[8\]r#   c                   | j                   ry d| _         t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   ~xY w# 1 sw Y   y xY w)NTr   r   r   )
rW   rX   rY   rZ   r   r1   r[   r   r   r   r]   s     r!   r[   zTFMobileViTSelfAttention.build5  s)   ::
4$'3tzz/ A

  $d.>.>!?@A4%1txx}}- ?dD,<,<=>?4$'3tzz/ A

  $d.>.>!?@A A 4A A? ?A As$   )E2)E)E$EE!$E-rH   r   r   r   r`   ra   )r   rd   r`   rd   rb   r   rd   rR   r_   r`   rd   re   )r;   rg   rh   r7   r   rU   r[   ri   rj   s   @r!   r   r     s    ',2
0Ar#   r   c                  2     e Zd Zd fdZdddZddZ xZS )TFMobileViTSelfOutputc                    t        |   di | t        j                  j	                  |d      | _        t        j                  j                  |j                        | _        || _	        y Ndenser1   r5   )
r6   r7   r   r<   r   r   r   hidden_dropout_probr   r   rG   rH   r   rN   r:   s       r!   r7   zTFMobileViTSelfOutput.__init__E  sR    "6"\\''''B
||++F,F,FG&r#   c                N    | j                  |      }| j                  ||      }|S rP   r   r   )rG   r   rR   s      r!   rU   zTFMobileViTSelfOutput.callK  s(    

=1]XFr#   c                   | j                   ry d| _         t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   y xY wNTr   rW   rX   rY   rZ   r   r1   r[   r   r]   s     r!   r[   zTFMobileViTSelfOutput.buildP  y    ::
4$'3tzz/ A

  $d.>.>!?@A A 4A A   )A>>Br   rb   r   re   rf   rj   s   @r!   r   r   D  s    '
Ar#   r   c                  8     e Zd Zd fdZd ZdddZddZ xZS )	TFMobileViTAttentionc                p    t        |   di | t        ||d      | _        t	        ||d      | _        y )N	attentionr   outputr5   )r6   r7   r   r   r   dense_outputr   s       r!   r7   zTFMobileViTAttention.__init__Z  s4    "6"1&+KX1&+HUr#   c                    t         re   NotImplementedError)rG   headss     r!   prune_headsz TFMobileViTAttention.prune_heads_  s    !!r#   c                R    | j                  ||      }| j                  ||      }|S rP   )r   r   )rG   r   rR   self_outputsattention_outputs        r!   rU   zTFMobileViTAttention.callb  s0    ~~mh~G,,\H,Mr#   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr   r   )rW   rX   rY   rZ   r   r1   r[   r   r]   s     r!   r[   zTFMobileViTAttention.buildg  s    ::
4d+7t~~223 +$$T*+4.:t00556 .!!''-. . ;+ +. .   C%CCC r   rb   r   re   )r;   rg   rh   r7   r   rU   r[   ri   rj   s   @r!   r   r   Y  s    V
" 
	.r#   r   c                  0     e Zd Zd fdZddZddZ xZS )TFMobileViTIntermediatec                   t        |   di | t        j                  j	                  |d      | _        t        |j                  t              r"t        |j                        | _
        || _        y |j                  | _
        || _        y r   )r6   r7   r   r<   r   r   rA   rD   rB   r   intermediate_act_fnr   rG   rH   r   intermediate_sizerN   r:   s        r!   r7   z TFMobileViTIntermediate.__init__t  st    "6"\\''(9'H
f''-'89J9J'KD$ ' (.'8'8D$&r#   c                J    | j                  |      }| j                  |      }|S re   )r   r   )rG   r   s     r!   rU   zTFMobileViTIntermediate.call}  s&    

=100?r#   c                   | j                   ry d| _         t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   y xY wr   r   r]   s     r!   r[   zTFMobileViTIntermediate.build  r   r   rH   r   r   r   r   r   r`   ra   )r   rd   r`   rd   re   rf   rj   s   @r!   r   r   s  s    '
Ar#   r   c                  2     e Zd Zd fdZdddZddZ xZS )TFMobileViTOutputc                    t        |   di | t        j                  j	                  |d      | _        t        j                  j                  |j                        | _        || _	        y r   )
r6   r7   r   r<   r   r   r   r   r   r   r   s        r!   r7   zTFMobileViTOutput.__init__  sR    "6"\\''''B
||++F,F,FG!2r#   c                X    | j                  |      }| j                  ||      }||z   }|S rP   r   )rG   r   input_tensorrR   s       r!   rU   zTFMobileViTOutput.call  s2    

=1]XF%4r#   c                   | j                   ry d| _         t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   y xY wr   )rW   rX   rY   rZ   r   r1   r[   r   r]   s     r!   r[   zTFMobileViTOutput.build  sy    ::
4$'3tzz/ G

  $d.D.D!EFG G 4G Gr   r   rb   )r   rd   r   rd   rR   r_   r`   rd   re   rf   rj   s   @r!   r   r     s    3Gr#   r   c                  2     e Zd Zd fdZdddZddZ xZS )TFMobileViTTransformerLayerc                h   t        |   di | t        ||d      | _        t	        |||d      | _        t        |||d      | _        t        j                  j                  |j                  d      | _        t        j                  j                  |j                  d      | _        || _        y )	Nr   r   intermediater   layernorm_beforer3   r1   layernorm_afterr5   )r6   r7   r   r   r   r   r   mobilevit_outputr   r<   LayerNormalizationlayer_norm_epsr   r   r   r   s        r!   r7   z$TFMobileViTTransformerLayer.__init__  s    "6"-fkT3FKIZaop 1&+GX_g h % ? ?H]H]dv ? w$||>>vG\G\ct>u&r#   c                    | j                  | j                  |      |      }||z   }| j                  |      }| j                  |      }| j	                  |||      }|S rP   )r   r   r   r   r   )rG   r   rR   r   layer_outputs        r!   rU   z TFMobileViTTransformerLayer.call  si    >>$*?*?*NYa>b(=8++M:((6,,\=S[,\r#   c                b   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   AxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTr   r   r   r   r   )rW   rX   rY   rZ   r   r1   r[   r   r   r   r   r   r]   s     r!   r[   z!TFMobileViTTransformerLayer.build  s   ::
4d+7t~~223 +$$T*+4.:t00556 .!!''-.4+T2>t4499: 2%%++D124+T2>t4499: L%%++T49I9I,JKL4*D1=t33889 K$$**D$8H8H+IJK K >+ +. .2 2L LK Ks<   G3%H ?H)H )H%3G= H
HH"%H.r   rb   r   re   rf   rj   s   @r!   r   r     s    'Kr#   r   c                  2     e Zd Zd fdZdddZddZ xZS )TFMobileViTTransformerc           	         t        |   di | g | _        t        |      D ]E  }t	        ||t        ||j                  z        d|       }| j                  j                  |       G y )Nr   )r   r   r1   r5   )r6   r7   r<   r   r   r   	mlp_ratior   )rG   rH   r   r   rN   r   transformer_layerr:   s          r!   r7   zTFMobileViTTransformer.__init__  sp    "6"z" 	2A ;'"%kF4D4D&D"EaS\	! KK01	2r#   c                <    | j                   D ]  } |||      } |S rP   r   )rG   r   rR   r   s       r!   rU   zTFMobileViTTransformer.call  s)     KK 	KL(JM	Kr#   c                    | j                   ry d| _         t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   IxY wr   r   r   s      r!   r[   zTFMobileViTTransformer.build  r   r   )rH   r   r   r   r   r   r`   ra   rb   r   re   rf   rj   s   @r!   r   r     s    2
-r#   r   c                  h     e Zd ZdZ	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZd	dZd
dZdddZddZ xZ	S )TFMobileViTLayerzC
    MobileViT block: https://huggingface.co/papers/2110.02178
    c           	     D   t        	|   di | |j                  | _        |j                  | _        |dk(  r*t        ||||dk(  r|nd|dkD  r|dz  ndd      | _        |}nd | _        t        ||||j                  d      | _	        t        |||dddd	      | _
        t        |||d
      | _        t        j                  j                  |j                   d      | _        t        |||dd      | _        t        |d|z  ||j                  d      | _        || _        y )Nr   r   downsampling_layer)rE   rF   rI   rK   r1   conv_kxkrp   Fconv_1x1)rE   rF   r+   rL   rM   r1   transformer)r   r   r1   	layernormr   conv_projectionfusionr5   )r6   r7   
patch_sizepatch_widthpatch_heightrl   r  r%   conv_kernel_sizer  r  r   r  r   r<   r   r   r	  r
  r  r   )
rG   rH   rE   rF   rI   r   r   rK   rN   r:   s
            r!   r7   zTFMobileViTLayer.__init__  sE    	"6"!,,"--Q;&A')!)QvA*2Q,QA)'D# 'K&*D#,#$//
 -#$# 
 2

 88AVAV]h8i3+ST[l 
 +K$//
 'r#   c                   | j                   | j                  }}t        j                  ||z  d      }t        j                  |      d   }t        j                  |      d   }t        j                  |      d   }t        j                  |      d   }t        j                  t        j
                  j                  ||z        |z  d      }	t        j                  t        j
                  j                  ||z        |z  d      }
|
|k7  xs |	|k7  }|r$t        j                  j                  ||	|
fd      }|
|z  }|	|z  }||z  }t        j                  |g d      }t        j                  |||z  |z  |||f      }t        j                  |g d	      }t        j                  |||||f      }t        j                  |g d
      }t        j                  |||z  ||f      }||f||||||d}||fS )Nint32r   r   r   r   bilinearsizemethodr   r   r   r   r   r   r   r   r   )	orig_sizer   channelsinterpolatenum_patchesnum_patches_widthnum_patches_height)r  r  rY   r   r   r   ceilimageresizer   r   )rG   rS   r  r  
patch_arear   orig_height
orig_widthr  
new_height	new_widthr  num_patch_widthnum_patch_heightr  patches	info_dicts                    r!   	unfoldingzTFMobileViTLayer.unfolding,  s   $($4$4d6G6G\WW[<7A
XXh'*
hhx(+XXh'*
88H%a(WWRWW\\+*DETV]^
GGBGGLLk)AB[PRYZ	:-J{1Jxxxz96MV`aH ${2%5&8 <<,7**zH,/??`kl
 ,,w5**Wz8[*&UV,,w5**WzJ'>X&VW &z2$ &&!0"2
	 	!!r#   c                   | j                   | j                  }}t        ||z        }|d   }|d   }|d   }|d   }	|d   }
t        j                  ||||df      }t        j
                  |d      }t        j                  |||z  |	z  |
||f      }t        j
                  |d	      }t        j                  ||||	|z  |
|z  f      }t        j
                  |d
      }|d   r%t        j                  j                  ||d   d      }|S )Nr   r  r  r  r  r   r  r   r   r   r   r   r   r  r  r  r  )r  r  r   rY   r   r   r  r   )rG   r(  r)  r  r  r!  r   r  r  r'  r&  rS   s               r!   foldingzTFMobileViTLayer.foldingX  s&   $($4$4d6G6G\|34
|,
Z(.$%9:#$78 ::g
JR'PQ<<|<::zH,/??R^`kl
 <<|<::z8-=-Lo`kNkl
 <<|<]#xxxi6LU_`Hr#   c                   | j                   r| j                  ||      }|}| j                  ||      }| j                  ||      }| j                  |      \  }}| j	                  ||      }| j                  |      }| j                  ||      }| j                  ||      }| j                  t        j                  ||gd      |      }|S )NrQ   r   r   )r  r  r  r*  r  r	  r-  r
  r  rY   concat)rG   rS   rR   ry   r(  r)  s         r!   rU   zTFMobileViTLayer.callt  s    ""..x(.KH ==H====H== "^^H5 ""7X">..) <<3''8'D;;ryy(H)=BGRZ;[r#   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   5xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   AxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)	NTr  r  r  r	  r
  r  r  )rW   rX   rY   rZ   r  r1   r[   r  r  r	  r   r
  r  r  r]   s     r!   r[   zTFMobileViTLayer.build  sQ   ::
4T*6t}}112 *##D)*4T*6t}}112 *##D)*4-9t//445 -  &&t,-4d+7t~~223 E$$dD$2B2B%CDE4*D1=t33889 1$$**40144(4t{{//0 (!!$'(4-t4@t66;;< 4''--d34 4 A#* ** *- -E E1 1( (4 4sT   J%J'?J4)K KK4K&J$'J14J>KKK#&K/r{   )rH   r   rE   r   rF   r   rI   r   r   r   r   r   rK   r   r`   ra   )rS   rd   r`   ztuple[tf.Tensor, dict])r(  rd   r)  dictr`   rd   rb   rc   re   )
r;   rg   rh   r}   r7   r*  r-  rU   r[   ri   rj   s   @r!   r  r    sv     ?'?' ?' 	?'
 ?' ?' ?' ?' 
?'B*"X824r#   r  c                  J     e Zd Zd fdZ	 	 	 d	 	 	 	 	 	 	 	 	 ddZddZ xZS )TFMobileViTEncoderc           
        t        |   di | || _        g | _        dx}}|j                  dk(  rd}d}n|j                  dk(  rd}d}t        ||j                  d   |j                  d   ddd      }| j                  j                  |       t        ||j                  d   |j                  d	   d	d
d      }| j                  j                  |       t        ||j                  d	   |j                  d
   d	|j                  d   d	d      }| j                  j                  |       |r|d	z  }t        ||j                  d
   |j                  d   d	|j                  d   d|d      }	| j                  j                  |	       |r|d	z  }t        ||j                  d   |j                  d   d	|j                  d	   d
|d      }
| j                  j                  |
       y )NFr   T   r   r   zlayer.0)rE   rF   rI   r   r1   r   r   zlayer.1zlayer.2)rE   rF   rI   r   r   r1      zlayer.3)rE   rF   rI   r   r   rK   r1      zlayer.4r5   )
r6   r7   rH   r<   output_strider   neck_hidden_sizesr   r  hidden_sizes)rG   rH   rN   dilate_layer_4dilate_layer_5rK   layer_1layer_2layer_3layer_4layer_5r:   s              r!   r7   zTFMobileViTEncoder.__init__  s   "6" +0/1$!N!N!!R'!N+00311!4
 	7#+00311!4
 	7#"00311!4++A.
 	7#MH"00311!4++A.	
 	7#MH"00311!4++A.	
 	7#r#   c                    |rdnd }t        | j                        D ]  \  }} |||      }|s||fz   } |st        d ||fD              S t        ||      S )Nr5   rQ   c              3  &   K   | ]	  }||  y wre   r5   ).0vs     r!   	<genexpr>z*TFMobileViTEncoder.call.<locals>.<genexpr>  s     Xq!-Xs   )last_hidden_stater   )	enumerater<   tupler
   )rG   r   output_hidden_statesreturn_dictrR   all_hidden_statesr   r   s           r!   rU   zTFMobileViTEncoder.call  ss     #7BD(5 	IOA|(JM#$58H$H!		I X]4E$FXXX =Pabbr#   c                    | j                   ry d| _         t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   IxY wr   r   r   s      r!   r[   zTFMobileViTEncoder.build  r   r   rH   r   r`   ra   )FTF)
r   rd   rJ  r_   rK  r_   rR   r_   r`   ztuple | TFBaseModelOutputre   rf   rj   s   @r!   r3  r3    sU    L$b &+ c c #c 	c
 c 
#c(-r#   r3  c                  b     e Zd ZeZdd fdZd Ze	 	 	 	 d	 	 	 	 	 	 	 	 	 dd       Zd	dZ	 xZ
S )
TFMobileViTMainLayerc                   t        |   di | || _        || _        t	        ||j
                  |j                  d   ddd      | _        t        |d      | _	        | j                  r/t	        ||j                  d   |j                  d	   d
d      | _
        t        j                  j                  dd      | _        y )Nr   r   r   	conv_stem)rE   rF   r+   rI   r1   encoderr   r7     r   conv_1x1_exprp   channels_firstpooler)data_formatr1   r5   )r6   r7   rH   expand_outputr%   num_channelsr9  rR  r3  rS  rU  r   r<   GlobalAveragePooling2DrW  )rG   rH   rY  rN   r:   s       r!   r7   zTFMobileViTMainLayer.__init__  s    "6"*-++11!4
 *&yA 4"44Q7#55a8#!D ll99FV]e9fr#   c                    t         )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        r   )rG   heads_to_prunes     r!   _prune_headsz!TFMobileViTMainLayer._prune_heads4  s
    
 "!r#   c           	     4   ||n| j                   j                  }||n| j                   j                  }t        j                  |d      }| j                  ||      }| j                  ||||      }| j                  r?| j                  |d         }t        j                  |g d      }| j                  |      }n |d   }t        j                  |g d      }d }|s[|||fn|f}	| j                  s>|dd  }
t        |
d   D cg c]  }t        j                  |d       c}      }
|
f}
|	|
z   S |	|dd  z   S |r1t        |d   D cg c]  }t        j                  |d       c}      }t        |||r      S |j                        S c c}w c c}w )	Nr,  r   rQ   rJ  rK  rR   r   r  r   )rG  pooler_outputr   )rH   rJ  use_return_dictrY   r   rR  rS  rY  rU  rW  rI  r   r   )rG   pixel_valuesrJ  rK  rR   embedding_outputencoder_outputsrG  pooled_outputr   remaining_encoder_outputshr   s                r!   rU   zTFMobileViTMainLayer.call;  s    %9$D $++JjJj 	 &1%<k$++B]B]
 ||L|D>>,>J,,3GU`ks ' 
  $ 1 1/!2D E !#->\ R !KK(9:M / 2 "->\ R M;H;T'7[lZnF %%,;AB,?),1AZ[\A]^AR\\!,7^-) .G,H) 999 333  !_`Oa"b!2<<#E"bcM+/'+?-
 	
 FUEbEb
 	
 _ #cs   F
Fc                d   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Ot        j                  | j                  j
                        5  | j                  j                  g d       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   ~xY w# 1 sw Y   y xY w)NTrR  rS  rW  NNNNrU  )
rW   rX   rY   rZ   rR  r1   r[   rS  rW  rU  r]   s     r!   r[   zTFMobileViTMainLayer.buildy  sR   ::
4d+7t~~223 +$$T*+4D)5t||001 )""4()44(4t{{//0 <!!":;<4.:t00556 .!!''-. . ;+ +) )< <. .s0   F%F?FF&FFF#&F/TrH   r   rY  r_   NNNF
rc  tf.Tensor | NonerJ  bool | NonerK  rp  rR   r_   r`   z/tuple[tf.Tensor] | TFBaseModelOutputWithPoolingre   )r;   rg   rh   r   config_classr7   r^  r   rU   r[   ri   rj   s   @r!   rP  rP    sk    "Lg6"  *.,0#';
&;
 *;
 !	;

 ;
 
9;
 ;
z.r#   rP  c                      e Zd ZdZeZdZdZy)TFMobileViTPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    	mobilevitrc  N)r;   rg   rh   r}   r   rq  base_model_prefixmain_input_namer5   r#   r!   rs  rs    s    
 #L#$Or#   rs  a	  
    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
    as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
    behavior.

    <Tip>

    TensorFlow models and layers in `transformers` accept two formats as input:

    - having all inputs as keyword arguments (like PyTorch models), or
    - having all inputs as a list, tuple or dict in the first positional argument.

    The reason the second format is supported is that Keras methods prefer this format when passing inputs to models
    and layers. Because of this support, when using methods like `model.fit()` things should "just work" for you - just
    pass your inputs and labels in any format that `model.fit()` supports! If, however, you want to use the second
    format outside of Keras methods like `fit()` and `predict()`, such as when creating your own layers or models with
    the Keras `Functional` API, there are three possibilities you can use to gather all the input Tensors in the first
    positional argument:

    - a single Tensor with `pixel_values` only and nothing else: `model(pixel_values)`
    - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
    `model([pixel_values, attention_mask])` or `model([pixel_values, attention_mask, token_type_ids])`
    - a dictionary with one or several input Tensors associated to the input names given in the docstring:
    `model({"pixel_values": pixel_values, "token_type_ids": token_type_ids})`

    Note that when creating models and layers with
    [subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) then you don't need to worry
    about any of this, as you can just pass inputs like you would to any other Python function!

    </Tip>

    Parameters:
        config ([`MobileViTConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~TFPreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        pixel_values (`np.ndarray`, `tf.Tensor`, `list[tf.Tensor]`, `dict[str, tf.Tensor]` or `dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`MobileViTImageProcessor.__call__`] for details.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
            used instead.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in
            eager mode, in graph mode the value will always be set to True.
zWThe bare MobileViT model outputting raw hidden-states without any specific head on top.c            	           e Zd Zdd fdZe ee       eee	e
de      	 	 	 	 d	 	 	 	 	 	 	 	 	 d	d                     Zd
dZ xZS )TFMobileViTModelc                n    t        |   |g|i | || _        || _        t	        ||d      | _        y )Nrt  rY  r1   )r6   r7   rH   rY  rP  rt  )rG   rH   rY  inputsrN   r:   s        r!   r7   zTFMobileViTModel.__init__  s:    3&3F3*-fMXcdr#   vision)
checkpointoutput_typerq  modalityexpected_outputc                0    | j                  ||||      }|S rP   )rt  )rG   rc  rJ  rK  rR   r   s         r!   rU   zTFMobileViTModel.call  s!      .BKZbcr#   c                    | j                   ry d| _         t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   y xY w)NTrt  )rW   rX   rY   rZ   rt  r1   r[   r]   s     r!   r[   zTFMobileViTModel.build  si    ::
4d+7t~~223 +$$T*+ + 8+ +s   A11A:rk  rl  rm  rn  re   )r;   rg   rh   r7   r   r   MOBILEVIT_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPErU   r[   ri   rj   s   @r!   rx  rx    s    
e *+EF&0$. *.,0#'& * !	
  
9 G +r#   rx  z
    MobileViT model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                       e Zd Zd fdZe ee       eee	e
e      	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 dd                     ZddZ xZS )	!TFMobileViTForImageClassificationc                z   t        |   |g|i | |j                  | _        t        |d      | _        t
        j                  j                  |j                        | _	        |j                  dkD  r+t
        j                  j                  |j                  d      nt        j                  | _        || _        y )Nrt  r   r   
classifier)r6   r7   
num_labelsrP  rt  r   r<   r   classifier_dropout_probr   r   rY   identityr  rH   )rG   rH   r{  rN   r:   s       r!   r7   z*TFMobileViTForImageClassification.__init__  s    3&3F3 ++-f;G ||++F,J,JKHNHYHY\]H]ELLv00|Dcecncn 	 r#   )r}  r~  rq  r  c                R   ||n| j                   j                  }| j                  ||||      }|r|j                  n|d   }| j	                  | j                  ||            }|dn| j                  ||      }	|s|f|dd z   }
|	|	f|
z   S |
S t        |	||j                        S )a  
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss). If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr`  r   rQ   )labelslogitsr   lossr  r   )	rH   rb  rt  ra  r  r   hf_compute_lossr   r   )rG   rc  rJ  r  rK  rR   outputsrf  r  r  r   s              r!   rU   z&TFMobileViTForImageClassification.call  s    , &1%<k$++B]B]../CQ\go ! 
 2=--'!*mh!OP~t4+?+?vV\+?+]Y,F)-)9TGf$EvE54^e^s^sttr#   c                (   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       t        | j                  d      rht        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  d   g       d d d        y y y # 1 sw Y   xY w# 1 sw Y   y xY w)NTrt  r  r1   r   )rW   rX   rY   rZ   rt  r1   r[   r\   r  rH   r9  r]   s     r!   r[   z'TFMobileViTForImageClassification.build3  s    ::
4d+7t~~223 +$$T*+4t,8t/]]4??#7#78 [OO))4t{{7T7TUW7X*YZ[ [ 0 9+ +[ [s   C<;6D<DDrN  NNNNF)rc  ro  rJ  rp  r  ro  rK  rp  rR   rp  r`   z.tuple | TFImageClassifierOutputWithNoAttentionre   )r;   rg   rh   r7   r   r   r  r   _IMAGE_CLASS_CHECKPOINTr   r  _IMAGE_CLASS_EXPECTED_OUTPUTrU   r[   ri   rj   s   @r!   r  r    s     *+EF*:$4	 *.,0#'#' %u&u *u !	u
 !u u 
8u G u>
[r#   r  c                  2     e Zd Zd fdZdddZddZ xZS )TFMobileViTASPPPoolingc           
         t        |   di | t        j                  j	                  dd      | _        t        |||ddddd      | _        y )	NTglobal_pool)keepdimsr1   r   relur  )rE   rF   r+   rI   rL   rM   r1   r5   )r6   r7   r   r<   r[  r  r%   r  )rG   rH   rE   rF   rN   r:   s        r!   r7   zTFMobileViTASPPPooling.__init__A  sT    "6" <<>>S`>a,#%"!	
r#   c                    t        |      dd }| j                  |      }| j                  ||      }t        j                  j                  ||d      }|S )Nr   r   rQ   r  r  )r   r  r  rY   r  r   )rG   rS   rR   spatial_sizes       r!   rU   zTFMobileViTASPPPooling.callQ  sR    !(+Ab1##H-==H==88??8,z?Rr#   c                   | j                   ry d| _         t        | dd       Ot        j                  | j                  j
                        5  | j                  j                  g d       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr  rj  r  )rW   rX   rY   rZ   r  r1   r[   r  r]   s     r!   r[   zTFMobileViTASPPPooling.buildX  s    ::
4-9t//445 A  &&'?@A4T*6t}}112 *##D)* * 7A A* *s   C'CCC")rH   r   rE   r   rF   r   r`   ra   rb   rc   re   rf   rj   s   @r!   r  r  @  s    
 	*r#   r  c                  6     e Zd ZdZd fdZdddZddZ xZS )	TFMobileViTASPPz
    ASPP module defined in DeepLab papers: https://huggingface.co/papers/1606.00915, https://huggingface.co/papers/1706.05587
    c                   t        	|   di | |j                  d   }|j                  }t	        |j
                        dk7  rt        d      g | _        t        |||ddd      }| j                  j                  |       | j                  j                  t        |j
                        D cg c]  \  }}t        |||d|dd|dz    	       c}}       t        |||dt	        |j
                        dz    
      }| j                  j                  |       t        |d|z  |ddd      | _        t        j                  j!                  |j"                        | _        y c c}}w )Nr   z"Expected 3 values for atrous_ratesr   r  zconvs.0rs   zconvs.)rE   rF   r+   rK   rM   r1   r   r7  projectr5   )r6   r7   r9  aspp_out_channelslenatrous_ratesr>   convsr%   r   extendrH  r  r  r   r<   r   aspp_dropout_probr   )
rG   rH   rN   rE   rF   in_projectionr   rate
pool_layerr:   s
            r!   r7   zTFMobileViTASPP.__init__i  sl   "6"..r2//v""#q(ABB
,#%!
 	

-(

  ))<)<= At % +!- !!#)!!a%)	
 ,KfSATAT=UXY=Y<Z4[

 	

*%+L(%!
 ||++F,D,DE9s   /"E$
c                
   t        j                  |g d      }g }| j                  D ]  }|j                   |||              t        j                  |d      }| j                  ||      }| j                  ||      }|S )Nr,  r   rQ   r   r   )rY   r   r  r   r/  r  r   )rG   rS   rR   pyramidconvpooled_featuress         r!   rU   zTFMobileViTASPP.call  sy     <<|<JJ 	>DNN48<=	>))G"-,,w,B,,,Jr#   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   bxY w# 1 sw Y   UxY w)NTr  r  )rW   rX   rY   rZ   r  r1   r[   r  )rG   r^   r  s      r!   r[   zTFMobileViTASPP.build  s    ::
4D)5t||001 )""4()4$'3

 %]]499- %JJt$% %% 4) )% %s   C*CCC	rN  rb   rc   re   r|   rj   s   @r!   r  r  d  s    2Fh
%r#   r  c                  6     e Zd ZdZd fdZdddZddZ xZS )	TFMobileViTDeepLabV3zJ
    DeepLabv3 architecture: https://huggingface.co/papers/1706.05587
    c           
         t        |   di | t        |d      | _        t        j
                  j                  |j                        | _        t        ||j                  |j                  ddddd      | _        y )	Nasppr   r   FTr  )rE   rF   r+   rL   rM   rJ   r1   r5   )r6   r7   r  r  r   r<   r   r  r   r%   r  r  r  rG   rH   rN   r:   s      r!   r7   zTFMobileViTDeepLabV3.__init__  sm    "6"#F8	||++F,J,JK.00**# 	
r#   c                ~    | j                  |d   |      }| j                  ||      }| j                  ||      }|S )Nr   rQ   )r  r   r  )rG   r   rR   rS   s       r!   rU   zTFMobileViTDeepLabV3.call  sB    99]2.9B<<8<<??8h??r#   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr  r  )rW   rX   rY   rZ   r  r1   r[   r  r]   s     r!   r[   zTFMobileViTDeepLabV3.build  s    ::
4&2tyy~~. &		%&4t,8t334 ,%%d+, , 9& &, ,r   rN  rb   r   re   r|   rj   s   @r!   r  r    s    
"	,r#   r  zX
    MobileViT model with a semantic segmentation head on top, e.g. for Pascal VOC.
    c                       e Zd Zd fdZd Ze ee       ee	e
      	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 dd                     Zd	dZ xZS )
"TFMobileViTForSemanticSegmentationc                    t        |   |fi | |j                  | _        t        |dd      | _        t        |d      | _        y )NFrt  rz  segmentation_headr   )r6   r7   r  rP  rt  r  r  r  s      r!   r7   z+TFMobileViTForSemanticSegmentation.__init__  sC    *6* ++-fEP[\!5fCV!Wr#   c                     t        |      dd  }t        j                  j                  ||d      }t        j
                  j                  dd       fd} |||      S )Nr   r  r  Tnone)from_logits	reductionc                    | |      }t        j                  | j                  j                  k7  |j                        }||z  }t        j
                  |      t        j
                  |      z  }t        j                  |d      S )Nr   r{   )rY   r   rH   semantic_loss_ignore_indexr   
reduce_sumr   )realpredunmasked_lossmaskmasked_lossreduced_masked_lossloss_fctrG   s         r!   r  zGTFMobileViTForSemanticSegmentation.hf_compute_loss.<locals>.masked_loss  sp    $T40M7744;;#I#IIQ^QdQdeD'$.K #%--"<r}}T?R"R::1488r#   )r   rY   r  r   r   lossesSparseCategoricalCrossentropy)rG   r  r  label_interp_shapeupsampled_logitsr  r  s   `     @r!   r  z2TFMobileViTForSemanticSegmentation.hf_compute_loss  sa     (/388??68JS]?^<<==$Z`=a	9 6#344r#   )r~  rq  c                   ||n| j                   j                  }||n| j                   j                  }|$| j                   j                  dkD  st	        d      | j                  |d||      }|r|j                  n|d   }| j                  ||      }d}	|| j                  ||      }	t        j                  |g d	      }|s|r
|f|dd z   }
n	|f|d
d z   }
|	|	f|
z   S |
S t        |	||r|j                        S d      S )aK  
        labels (`tf.Tensor` of shape `(batch_size, height, width)`, *optional*):
            Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1`, a classification loss is computed (Cross-Entropy).

        Returns:

        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, TFMobileViTForSemanticSegmentation
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> image_processor = AutoImageProcessor.from_pretrained("apple/deeplabv3-mobilevit-small")
        >>> model = TFMobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-small")

        >>> inputs = image_processor(images=image, return_tensors="tf")

        >>> outputs = model(**inputs)

        >>> # logits are of shape (batch_size, num_labels, height, width)
        >>> logits = outputs.logits
        ```Nr   z/The number of labels should be greater than oneTr`  rQ   )r  r  r  r   r   r  )rH   rJ  rb  r  r>   rt  r   r  r  rY   r   r   )rG   rc  r  rJ  rK  rR   r  encoder_hidden_statesr  r  r   s              r!   rU   z'TFMobileViTForSemanticSegmentation.call  sI   N %9$D $++JjJj 	 &1%<k$++B]B]dkk&<&<q&@NOO..!%#	 ! 
 :E 5 5'RS*''(='Q''vf'ED f<8# WQR[0 WQR[0)-)9TGf$EvE73G'//
 	
 NR
 	
r#   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTrt  r  )rW   rX   rY   rZ   rt  r1   r[   r  r]   s     r!   r[   z(TFMobileViTForSemanticSegmentation.buildO  s    ::
4d+7t~~223 +$$T*+4,d3?t55::; 3&&,,T23 3 @+ +3 3r   rN  r  )rc  ro  r  ro  rJ  rp  rK  rp  rR   r_   r`   z0tuple | TFSemanticSegmenterOutputWithNoAttentionre   )r;   rg   rh   r7   r  r   r   r  r	   r   r  rU   r[   ri   rj   s   @r!   r  r    s    X5( *+EF+Sbqr *.#',0#'I
&I
 !I
 *	I

 !I
 I
 
:I
 s G I
V	3r#   r  )r  r  rx  rs  )r   N)r   r   r   r   r   z
int | Noner`   r   )@r}   
__future__r   
tensorflowrY   activations_tfr   
file_utilsr   r   r   r	   modeling_tf_outputsr
   r   r   r   modeling_tf_utilsr   r   r   r   r   tf_utilsr   r   utilsr   configuration_mobilevitr   
get_loggerr;   r8   r  r  r  r  r  r"   r<   Layerr%   rl   r   r   r   r   r   r   r   r   r  r3  rP  rs  MOBILEVIT_START_DOCSTRINGr  rx  r  r  r  r  r  __all__r5   r#   r!   <module>r     s  " & "  /    3  4 
		H	% $ . '  2 1 JT5<<-- JTZ=,%,,"4"4 =,@$- 2 2 $-N@Au||11 @AFAELL.. A*.5<<-- .4Aell00 A0G** G,%K%,,"4"4 %KP-U\\// -:4u||)) 4Dj-++ j-Z r.5<<-- r. r.j%!2 %' R   ]!+1 !+	!+H  ?[(BD` ?[?[D!*U\\// !*HP%ell(( P%f%,5<<-- %,P  	s3)C s3s3lr#   