
    rhG                   >   d Z ddlmZ ddlZddlmZ ddlZddlZ	ddl
mZ ddlmZmZmZmZmZmZmZmZ ddlmZmZmZmZmZmZmZmZmZmZm Z m!Z! dd	l"m#Z#m$Z$m%Z% dd
l&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, ddl-m.Z.  e+j^                  e0      Z1dZ2dZ3dZ4dZ5dZ6dZ7dZ8dZ9dZ:dZ;dZ<dZ=dZ> G d d      Z? G d dej                  j                        ZB G d dej                  j                        ZD G d  d!ej                  j                        ZEeDeEd"ZF G d# d$ej                  j                        ZG G d% d&ej                  j                        ZH G d' d(ej                  j                        ZI G d) d*ej                  j                        ZJ G d+ d,ej                  j                        ZK G d- d.ej                  j                        ZL G d/ d0ej                  j                        ZM G d1 d2ej                  j                        ZN G d3 d4ej                  j                        ZO G d5 d6ej                  j                        ZP G d7 d8ej                  j                        ZQ G d9 d:ej                  j                        ZR G d; d<ej                  j                        ZS G d= d>ej                  j                        ZT G d? d@ej                  j                        ZU G dA dBej                  j                        ZVe  G dC dDej                  j                               ZW G dE dFe      ZXe G dG dHe'             ZYdIZZdJZ[ e)dKeZ       G dL dMeX             Z\ e)dNeZ       G dO dPeXe?             Z] e)dQeZ       G dR dSeXe             Z^ G dT dUej                  j                        Z_ e)dVeZ       G dW dXeXe             Z` e)dYeZ       G dZ d[eXe             Za e)d\eZ       G d] d^eXe             Zb e)d_eZ       G d` daeXe             Zc e)dbeZ       G dc ddeXe             Zdg deZey)fzTF 2.0 MobileBERT model.    )annotationsN)	dataclass   )get_tf_activation)TFBaseModelOutputTFBaseModelOutputWithPoolingTFMaskedLMOutputTFMultipleChoiceModelOutputTFNextSentencePredictorOutputTFQuestionAnsweringModelOutputTFSequenceClassifierOutputTFTokenClassifierOutput)TFMaskedLanguageModelingLossTFModelInputTypeTFMultipleChoiceLossTFNextSentencePredictionLossTFPreTrainedModelTFQuestionAnsweringLossTFSequenceClassificationLossTFTokenClassificationLossget_initializerkeraskeras_serializableunpack_inputs)check_embeddings_within_bounds
shape_liststable_softmax)ModelOutputadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )MobileBertConfigzgoogle/mobilebert-uncasedr%   z"vumichien/mobilebert-finetuned-nerzK['I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC']gQ?z%vumichien/mobilebert-uncased-squad-v2z'a nice puppet'gףp=
@      zvumichien/emo-mobilebertz'others'z4.72c                      e Zd ZdZddZy)TFMobileBertPreTrainingLossz
    Loss function suitable for BERT-like pretraining, that is, the task of pretraining a language model by combining
    NSP + MLM. .. note:: Any label of -100 will be ignored (along with the corresponding logits) in the loss
    computation.
    c                   t         j                  j                  dt         j                  j                  j                        } |t
        j                  j                  |d         |d         }t        j                  |d   dk7  |j                        }||z  }t        j                  |      t        j                  |      z  } |t
        j                  j                  |d         |d	         }t        j                  |d   dk7  |j                        }	||	z  }
t        j                  |
      t        j                  |	      z  }t        j                  ||z   d
      S )NT)from_logits	reductionlabelsr   )y_truey_predidtypenext_sentence_labelr$   )r$   )r   lossesSparseCategoricalCrossentropy	ReductionNONEtfnnrelucastr1   
reduce_sumreshape)selfr-   logitsloss_fnunmasked_lm_losseslm_loss_maskmasked_lm_lossesreduced_masked_lm_lossunmasked_ns_lossns_loss_maskmasked_ns_lossreduced_masked_ns_losss               /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/mobilebert/modeling_tf_mobilebert.pyhf_compute_lossz+TFMobileBertPreTrainingLoss.hf_compute_loss^   s-   ,,<<Y^YeYeYoYoYtYt<u %BEEJJvh7G,HQWXYQZ[ wwvh/47?Q?W?WX-<!#/?!@2==Q]C^!^ #"%%**V<Q5R*S\bcd\efwwv&;<DL\LbLbc)L8!#~!>|A\!\zz03II4PP    N)r-   	tf.Tensorr>   rK   returnrK   )__name__
__module____qualname____doc__rI    rJ   rH   r)   r)   W   s    QrJ   r)   c                  ,     e Zd Z fdZd ZddZ xZS )TFMobileBertIntermediatec                ,   t        |   di | t        j                  j	                  |j
                  d      | _        t        |j                  t              r"t        |j                        | _        || _        y |j                  | _        || _        y )NdensenamerQ   )super__init__r   layersDenseintermediate_sizerU   
isinstance
hidden_actstrr   intermediate_act_fnconfigr=   ra   kwargs	__class__s      rH   rY   z!TFMobileBertIntermediate.__init__t   sw    "6"\\''(@(@w'O
f''-'89J9J'KD$  (.'8'8D$rJ   c                J    | j                  |      }| j                  |      }|S N)rU   r`   r=   hidden_statess     rH   callzTFMobileBertIntermediate.call   s&    

=100?rJ   c                (   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   y xY wNTrU   )	builtgetattrr7   
name_scoperU   rW   buildra   true_hidden_sizer=   input_shapes     rH   ro   zTFMobileBertIntermediate.build   s}    ::
4$'3tzz/ M

  $dkk.J.J!KLM M 4M M   3BBrf   rM   rN   rO   rY   ri   ro   __classcell__rd   s   @rH   rS   rS   s   s    	MrJ   rS   c                  *     e Zd Z fdZd fd	Z xZS )TFLayerNormc                2    || _         t        |   |i | y rf   )	feat_sizerX   rY   )r=   rz   argsrc   rd   s       rH   rY   zTFLayerNorm.__init__   s    "$)&)rJ   c                >    t         |   d d | j                  g       y rf   )rX   ro   rz   r=   rr   rd   s     rH   ro   zTFLayerNorm.build   s    tT4>>23rJ   rf   )rM   rN   rO   rY   ro   ru   rv   s   @rH   rx   rx      s    *4 4rJ   rx   c                  2     e Zd Zd fd	Z fdZddZ xZS )TFNoNormc                2    t        |   di | || _        y )NrQ   )rX   rY   rz   )r=   rz   epsilonrc   rd   s       rH   rY   zTFNoNorm.__init__   s    "6""rJ   c                    | j                  d| j                  gd      | _        | j                  d| j                  gd      | _        t        |   |       y )Nbiaszeros)shapeinitializerweightones)
add_weightrz   r   r   rX   ro   r}   s     rH   ro   zTFNoNorm.build   sK    OOF4>>2BPWOX	ooht~~6FTZo[k"rJ   c                :    || j                   z  | j                  z   S rf   )r   r   )r=   inputss     rH   ri   zTFNoNorm.call   s    #dii//rJ   rf   )r   rK   )rM   rN   rO   rY   ro   ri   ru   rv   s   @rH   r   r      s    ##
0rJ   r   )
layer_normno_normc                  2     e Zd ZdZ fdZddZddZ xZS )TFMobileBertEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                V   t        |   di | |j                  | _        |j                  | _        || _        |j
                  | _        |j                  | _        |j                  | _        t        j                  j                  |j
                  d      | _        t        |j                     |j
                  |j                  d      | _        t        j                  j!                  |j"                        | _        | j                  | j                  r
dz  | _        y dz  | _        y )	Nembedding_transformationrV   	LayerNormr   rW   )rater   r$   rQ   )rX   rY   trigram_inputembedding_sizera   hidden_sizemax_position_embeddingsinitializer_ranger   rZ   r[   r   NORM2FNnormalization_typelayer_norm_epsr   Dropouthidden_dropout_probdropoutembedded_input_sizerb   s      rH   rY   zTFMobileBertEmbeddings.__init__   s    "6"#11$33!--'-'E'E$!'!9!9(-(:(:6;M;MTn(:(o% !!:!:;(=(=K
 ||++1K1K+L#'#6#6t?Q?Q!#Y WX#Y rJ   c                   t        j                  d      5  | j                  d| j                  j                  | j
                  gt        | j                              | _        d d d        t        j                  d      5  | j                  d| j                  j                  | j                  gt        | j                              | _        d d d        t        j                  d      5  | j                  d| j                  | j                  gt        | j                              | _        d d d        | j                  ry d| _        t        | d	d       Zt        j                  | j                   j"                        5  | j                   j%                  d d | j&                  g       d d d        t        | d
d       Nt        j                  | j(                  j"                        5  | j(                  j%                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   OxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)Nword_embeddingsr   )r   )rW   r   r   token_type_embeddings
embeddingsposition_embeddingsTr   r   )r7   rn   r   ra   
vocab_sizer   r   r   r   type_vocab_sizer   r   r   r   rl   rm   r   rW   ro   r   r   rq   s     rH   ro   zTFMobileBertEmbeddings.build   s   ]],- 	//{{--t/B/BC+d>T>TU * DK	 ]]23 	)-!{{22D4D4DE+d>T>TU *9 *D&	 ]]01 	'+!33T5E5EF+d>T>TU (7 (D$	 ::
43T:Ft<<AAB \--33T4AYAY4Z[\4d+7t~~223 +$$T*+ + 87	 		 		 	\ \+ +s>   AHAH,AH))H5)IHH&)H25H>I
c           
     R   ||J |At        || j                  j                         t        j                  | j
                  |      }t        |      dd }|t        j                  |d      }| j                  rTt        j                  t        j                  |ddddf   d      |t        j                  |ddddf   d      gd	
      }| j                  s| j                  | j                  k7  r| j                  |      }|/t        j                  t        j                  d|d         d
      }t        j                  | j                   |      }t        j                  | j"                  |      }||z   |z   }	| j%                  |	      }	| j'                  |	|      }	|	S )z
        Applies embedding based on inputs tensor.

        Returns:
            final_embeddings (`tf.Tensor`): output embedding tensor.
        N)paramsindicesr   )dimsvaluer$   )r   r   )r   r$   r   )r   )r$   r   r      axis)startlimit)r   )r   training)r   ra   r   r7   gatherr   r   fillr   concatpadr   r   r   expand_dimsranger   r   r   r   )
r=   	input_idsposition_idstoken_type_idsinputs_embedsr   rr   position_embedstoken_type_embedsfinal_embeddingss
             rH   ri   zTFMobileBertEmbeddings.call   s    %-*?@@ *9dkk6L6LMIIT[[)LM /4!WW+Q?N IIFF=AB/1IJ!FF=CRC02JK
 M !4!48H8H!H 99-HM>>"((+b/*RYZ[L))4+C+C\ZIIT-G-GQ_`(?:=NN>>1A>B<</?(<SrJ   rf   )NNNNF)rM   rN   rO   rP   rY   ro   ri   ru   rv   s   @rH   r   r      s    QZ&+@/ rJ   r   c                  6     e Zd Z fdZd Z	 ddZddZ xZS )TFMobileBertSelfAttentionc                   t        |   di | |j                  |j                  z  dk7  r%t	        d|j                   d|j                         |j                  | _        |j
                  | _        |j                  |j                  z  dk(  sJ t        |j                  |j                  z        | _        | j                  | j                  z  | _	        t        j                  j                  | j                  t        |j                        d      | _        t        j                  j                  | j                  t        |j                        d      | _        t        j                  j                  | j                  t        |j                        d      | _        t        j                  j%                  |j&                        | _        || _        y )	Nr   zThe hidden size (z6) is not a multiple of the number of attention heads (querykernel_initializerrW   keyr   rQ   )rX   rY   r   num_attention_heads
ValueErroroutput_attentionsintrp   attention_head_sizeall_head_sizer   rZ   r[   r   r   r   r   r   r   attention_probs_dropout_probr   ra   rb   s      rH   rY   z"TFMobileBertSelfAttention.__init__  s   "6" : ::a?#F$6$6#7 8 4457 
 $*#=#= !'!9!9!!F$>$>>!CCC#&v'>'>A[A['[#\ !558P8PP\\''?6C[C[3\cj ( 

 <<%%?6C[C[3\ch & 
 \\''?6C[C[3\cj ( 

 ||++F,O,OPrJ   c                    t        j                  ||d| j                  | j                  f      }t        j                  |g d      S )Nr   r   r   r$   r   perm)r7   r<   r   r   	transpose)r=   x
batch_sizes      rH   transpose_for_scoresz.TFMobileBertSelfAttention.transpose_for_scores,  s8    JJq:r4+C+CTE]E]^_||AL11rJ   c                   t        |      d   }| j                  |      }	| j                  |      }
| j                  |      }| j	                  |	|      }| j	                  |
|      }| j	                  ||      }t        j                  ||d      }t        j                  t        |      d   |j                        }|t
        j                  j                  |      z  }|&t        j                  ||j                        }||z   }t        |d      }| j                  ||      }|||z  }t        j                  ||      }t        j                  |g d	      }t        j                  ||d| j                  f      }|r||f}|S |f}|S )
Nr   T)transpose_br   r0   r   r   r   r   )r   r   r   r   r   r7   matmulr:   r1   mathsqrtr   r   r   r<   r   )r=   query_tensor
key_tensorvalue_tensorattention_mask	head_maskr   r   r   mixed_query_layermixed_key_layermixed_value_layerquery_layer	key_layervalue_layerattention_scoresdkattention_probscontext_layeroutputss                       rH   ri   zTFMobileBertSelfAttention.call1  s     /2
 JJ|4((:. JJ|4//0A:N--ozJ	//0A:N 99
 WWZ	*2.6F6L6LM+bggll2.>>%WW^;K;Q;QRN/.@ ))9C ,,,J  -	9O		/;?]F

JD,>,>?
 7H=/2 O\M]rJ   c                v   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       t        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  r| j                  j                  n| j                  j                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTr   r   r   )rl   rm   r7   rn   r   rW   ro   ra   rp   r   r   use_bottleneck_attentionr   rq   s     rH   ro   zTFMobileBertSelfAttention.build^  sW   ::
4$'3tzz/ M

  $dkk.J.J!KLM4%1txx}}- KdDKK,H,HIJK4$'3tzz/ 	

  ;;?? 44![[44	 	 4M MK K	 	s%   3F<3F#-AF/F #F,/F8Frf   )rM   rN   rO   rY   r   ri   ro   ru   rv   s   @rH   r   r     s    62 ns+ZrJ   r   c                  .     e Zd Z fdZddZddZ xZS )TFMobileBertSelfOutputc                   t        |   di | |j                  | _        t        j                  j                  |j                  t        |j                        d      | _	        t        |j                     |j                  |j                  d      | _        | j                  s.t        j                  j                  |j                        | _        || _        y )NrU   r   r   r   rQ   )rX   rY   use_bottleneckr   rZ   r[   rp   r   r   rU   r   r   r   r   r   r   r   ra   rb   s      rH   rY   zTFMobileBertSelfOutput.__init__v  s    "6"$33\\''##H`H`8aho ( 

 !!:!:;##V-B-B
 "" <<//0J0JKDLrJ   c                    | j                  |      }| j                  s| j                  ||      }| j                  ||z         }|S Nr   )rU   r   r   r   )r=   rh   residual_tensorr   s       rH   ri   zTFMobileBertSelfOutput.call  sD    

=1"" LLLJM}'FGrJ   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY wNTrU   r   
rl   rm   r7   rn   rU   rW   ro   ra   rp   r   rq   s     rH   ro   zTFMobileBertSelfOutput.build      ::
4$'3tzz/ M

  $dkk.J.J!KLM4d+7t~~223 +$$T*+ + 8M M+ +   3C"<C."C+.C7r   rf   rt   rv   s   @rH   r   r   u  s    	+rJ   r   c                  6     e Zd Z fdZd Z	 ddZddZ xZS )TFMobileBertAttentionc                l    t        |   di | t        |d      | _        t	        |d      | _        y )Nr=   rV   outputrQ   )rX   rY   r   r=   r   mobilebert_outputrb   s      rH   rY   zTFMobileBertAttention.__init__  s0    "6"-f6B	!7X!NrJ   c                    t         rf   NotImplementedError)r=   headss     rH   prune_headsz!TFMobileBertAttention.prune_heads  s    !!rJ   c	           	     v    | j                  |||||||      }	| j                  |	d   ||      }
|
f|	dd  z   }|S )Nr   r   r$   )r=   r  )r=   r   r   r   layer_inputr   r   r   r   self_outputsattention_outputr   s               rH   ri   zTFMobileBertAttention.call  s`     yy*lNIO`ks ! 
  11,q/;Ya1b#%QR(88rJ   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr=   r  )rl   rm   r7   rn   r=   rW   ro   r  rq   s     rH   ro   zTFMobileBertAttention.build  s    ::
4&2tyy~~. &		%&4,d3?t55::; 3&&,,T23 3 @& &3 3   C%CCC r   rf   )rM   rN   rO   rY   r  ri   ro   ru   rv   s   @rH   r   r     s    O
" &	3rJ   r   c                  .     e Zd Z fdZddZddZ xZS )TFOutputBottleneckc                V   t        |   di | t        j                  j	                  |j
                  d      | _        t        |j                     |j
                  |j                  d      | _
        t        j                  j                  |j                        | _        || _        y NrU   rV   r   r   rQ   )rX   rY   r   rZ   r[   r   rU   r   r   r   r   r   r   r   ra   rb   s      rH   rY   zTFOutputBottleneck.__init__  s    "6"\\''(:(:'I
 !:!:;(=(=K
 ||++F,F,FGrJ   c                v    | j                  |      }| j                  ||      }| j                  ||z         }|S r   )rU   r   r   )r=   rh   r   r   layer_outputss        rH   ri   zTFOutputBottleneck.call  s;    

=1]XF}'FGrJ   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY wr   r   rq   s     rH   ro   zTFOutputBottleneck.build  r   r   r   rf   rt   rv   s   @rH   r  r    s    	+rJ   r  c                  .     e Zd Z fdZddZddZ xZS )TFMobileBertOutputc                   t        |   di | |j                  | _        t        j                  j                  |j                  t        |j                        d      | _	        t        |j                     |j                  |j                  d      | _        | j                  s6t        j                  j                  |j                        | _        || _        y t#        |d      | _        || _        y )NrU   r   r   r   
bottleneckrV   rQ   )rX   rY   r   r   rZ   r[   rp   r   r   rU   r   r   r   r   r   r   r   r  r  ra   rb   s      rH   rY   zTFMobileBertOutput.__init__  s    "6"$33\\''##H`H`8aho ( 

 !!:!:;##V-B-B
 "" <<//0J0JKDL  1lKDOrJ   c                    | j                  |      }| j                  s)| j                  ||      }| j                  ||z         }|S | j                  ||z         }| j	                  ||      }|S r   )rU   r   r   r   r  )r=   rh   residual_tensor_1residual_tensor_2r   s        rH   ri   zTFMobileBertOutput.call  st    

=1"" LLLJM NN=;L+LMM  !NN=;L+LMM OOM;LMMrJ   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   qxY w# 1 sw Y   y xY w)NTrU   r   r  )rl   rm   r7   rn   rU   rW   ro   ra   r\   r   r  rq   s     rH   ro   zTFMobileBertOutput.build  s   ::
4$'3tzz/ N

  $dkk.K.K!LMN4d+7t~~223 +$$T*+4t,8t334 ,%%d+, , 9N N+ +, ,s$   3D<<EE<EEEr   rf   rt   rv   s   @rH   r  r    s    ,rJ   r  c                  ,     e Zd Z fdZd ZddZ xZS )TFBottleneckLayerc                    t        |   di | t        j                  j	                  |j
                  d      | _        t        |j                     |j
                  |j                  d      | _
        || _        y r  )rX   rY   r   rZ   r[   intra_bottleneck_sizerU   r   r   r   r   ra   rb   s      rH   rY   zTFBottleneckLayer.__init__  sg    "6"\\''(D(D7'S
 !:!:;((&2G2Gk
 rJ   c                J    | j                  |      }| j                  |      }|S rf   rU   r   )r=   r   rh   s      rH   ri   zTFBottleneckLayer.call  s$    

6*}5rJ   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY wr   
rl   rm   r7   rn   rU   rW   ro   ra   r   r   rq   s     rH   ro   zTFBottleneckLayer.build      ::
4$'3tzz/ H

  $dkk.E.E!FGH4d+7t~~223 +$$T*+ + 8H H+ +r   rf   rt   rv   s   @rH   r  r        
	+rJ   r  c                  ,     e Zd Z fdZd ZddZ xZS )TFBottleneckc                    t        |   di | |j                  | _        |j                  | _        t	        |d      | _        | j                  rt	        |d      | _        y y )NinputrV   	attentionrQ   )rX   rY   key_query_shared_bottleneckr   r  bottleneck_inputr*  rb   s      rH   rY   zTFBottleneck.__init__  sZ    "6"+1+M+M((.(G(G% 1&w G++.vKHDN ,rJ   c                    | j                  |      }| j                  r|fdz  S | j                  r| j                  |      }||||fS ||||fS )N   )r,  r   r+  r*  )r=   rh   bottlenecked_hidden_statesshared_attention_inputs       rH   ri   zTFBottleneck.call&  se    " &*%:%:=%I"((.0144--%)^^M%B"*,BMSmnn!=-A[\\rJ   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr,  r*  )rl   rm   r7   rn   r,  rW   ro   r*  rq   s     rH   ro   zTFBottleneck.build@  s    ::
4+T2>t4499: 2%%++D124d+7t~~223 +$$T*+ + 82 2+ +r  rf   rt   rv   s   @rH   r'  r'    s    I]4	+rJ   r'  c                  ,     e Zd Z fdZd ZddZ xZS )TFFFNOutputc                    t        |   di | t        j                  j	                  |j
                  d      | _        t        |j                     |j
                  |j                  d      | _
        || _        y r  )rX   rY   r   rZ   r[   rp   rU   r   r   r   r   ra   rb   s      rH   rY   zTFFFNOutput.__init__M  sg    "6"\\''(?(?g'N
 !:!:;##V-B-B
 rJ   c                P    | j                  |      }| j                  ||z         }|S rf   r!  )r=   rh   r   s      rH   ri   zTFFFNOutput.callU  s)    

=1}'FGrJ   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY wr   )
rl   rm   r7   rn   rU   rW   ro   ra   r\   r   rq   s     rH   ro   zTFFFNOutput.buildZ  s    ::
4$'3tzz/ N

  $dkk.K.K!LMN4d+7t~~223 +$$T*+ + 8N N+ +r   rf   rt   rv   s   @rH   r3  r3  L  r%  rJ   r3  c                  ,     e Zd Z fdZd ZddZ xZS )
TFFFNLayerc                l    t        |   di | t        |d      | _        t	        |d      | _        y )NintermediaterV   r   rQ   )rX   rY   rS   r:  r3  r  rb   s      rH   rY   zTFFFNLayer.__init__g  s1    "6"4V.Q!,V(!CrJ   c                L    | j                  |      }| j                  ||      }|S rf   )r:  r  )r=   rh   intermediate_outputr  s       rH   ri   zTFFFNLayer.calll  s,    "//>../BMRrJ   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr:  r  )rl   rm   r7   rn   r:  rW   ro   r  rq   s     rH   ro   zTFFFNLayer.buildq  s    ::
4.:t00556 .!!''-.4,d3?t55::; 3&&,,T23 3 @. .3 3r  rf   rt   rv   s   @rH   r8  r8  f  s    D

	3rJ   r8  c                  .     e Zd Z fdZddZddZ xZS )TFMobileBertLayerc                   t        |   di | |j                  | _        |j                  | _        t	        |d      | _        t        |d      | _        t        |d      | _	        | j                  rt        |d      | _        |j                  dkD  r:t        |j                  dz
        D cg c]  }t        |d|        c}| _        y y c c}w )	Nr*  rV   r:  r   r  r$   zffn.rQ   )rX   rY   r   num_feedforward_networksr   r*  rS   r:  r  r  r'  r  r   r8  ffnr=   ra   rc   ird   s       rH   rY   zTFMobileBertLayer.__init__~  s    "6"$33(.(G(G%.vKH4V.Q!3F!J*6EDO**Q.EJ6KjKjmnKnEop
6$qc
;pDH /ps   0Cc           
        | j                   r| j                  |      \  }}}}	n|gdz  \  }}}}	| j                  ||||	||||      }
|
d   }|f}| j                  dk7  r+t	        | j
                        D ]  \  }} ||      }||fz  } | j                  |      }| j                  ||||      }|f|
dd  z   t        j                  d      ||||	||fz   |z   }|S )Nr.  r   r   r$   )
r   r  r*  rA  	enumeraterB  r:  r  r7   constant)r=   rh   r   r   r   r   r   r   r   r  attention_outputsr
  srD  
ffn_moduler<  layer_outputr   s                     rH   ri   zTFMobileBertLayer.call  sD   BF//R_B`?L*lKCP/TUBU?L*lK NN + 	
 -Q/((A-!*488!4 ):#-.>#? &(() #//0@A--.ACSUbmu-v O#$ A #
  	 rJ   c                (   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   qxY w# 1 sw Y   $xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   {xY w)NTr*  r:  r  r  rB  )rl   rm   r7   rn   r*  rW   ro   r:  r  r  rB  r=   rr   layers      rH   ro   zTFMobileBertLayer.build  s   ::
4d+7t~~223 +$$T*+4.:t00556 .!!''-.4,d3?t55::; 3&&,,T234t,8t334 ,%%d+,4%1 &]]5::. &KK%& && 2+ +. .3 3, ,& &s<   G%G#?G0G<8HG #G-0G9<HH	r   rf   rt   rv   s   @rH   r?  r?  }  s    q+Z&rJ   r?  c                  0     e Zd Z fdZ	 ddZddZ xZS )TFMobileBertEncoderc                    t        |   di | |j                  | _        |j                  | _        t	        |j
                        D cg c]  }t        |d|        c}| _        y c c}w )Nzlayer_._rV   rQ   )rX   rY   r   output_hidden_statesr   num_hidden_layersr?  rN  rC  s       rH   rY   zTFMobileBertEncoder.__init__  s^    "6"!'!9!9$*$?$?!NSTZTlTlNmn'xs^Dn
ns   
A*c                    |rdnd }|rdnd }	t        | j                        D ].  \  }
}|r||fz   } |||||
   ||      }|d   }|s&|	|d   fz   }	0 |r||fz   }|st        d |||	fD              S t        |||	      S )NrQ   r   r   r$   c              3  &   K   | ]	  }||  y wrf   rQ   ).0vs     rH   	<genexpr>z+TFMobileBertEncoder.call.<locals>.<genexpr>  s     hqZ[Zghs   )last_hidden_staterh   
attentions)rF  rN  tupler   )r=   rh   r   r   r   rR  return_dictr   all_hidden_statesall_attentionsrD  layer_moduler  s                rH   ri   zTFMobileBertEncoder.call  s     #7BD0d(4 	FOA|#$58H$H!(~y|=NYaM *!,M !/=3C2E!E	F   1]4D Dh]4E~$Vhhh +;LYg
 	
rJ   c                    | j                   ry d| _         t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   IxY w)NTrN  )rl   rm   rN  r7   rn   rW   ro   rM  s      rH   ro   zTFMobileBertEncoder.build  sp    ::
4$'3 &]]5::. &KK%& && 4& &s   A..A7	r   rf   rt   rv   s   @rH   rP  rP    s    o !
F&rJ   rP  c                  ,     e Zd Z fdZd ZddZ xZS )TFMobileBertPoolerc                    t        |   di | |j                  | _        | j                  rEt        j
                  j                  |j                  t        |j                        dd      | _
        || _        y )NtanhrU   )r   
activationrW   rQ   )rX   rY   classifier_activationdo_activater   rZ   r[   r   r   r   rU   ra   rb   s      rH   rY   zTFMobileBertPooler.__init__  sk    "6"!77++""#263K3K#L!	 , DJ rJ   c                V    |d d df   }| j                   s|S | j                  |      }|S Nr   )rg  rU   )r=   rh   first_token_tensorpooled_outputs       rH   ri   zTFMobileBertPooler.call  s7     +1a40%% JJ'9:M  rJ   c                (   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   y xY wrk   )	rl   rm   r7   rn   rU   rW   ro   ra   r   rq   s     rH   ro   zTFMobileBertPooler.build  s}    ::
4$'3tzz/ H

  $dkk.E.E!FGH H 4H Hrs   rf   rt   rv   s   @rH   rb  rb    s    
!HrJ   rb  c                  ,     e Zd Z fdZd ZddZ xZS )#TFMobileBertPredictionHeadTransformc                   t        |   di | t        j                  j	                  |j
                  t        |j                        d      | _        t        |j                  t              rt        |j                        | _        n|j                  | _        t        d   |j
                  |j                  d      | _        || _        y )NrU   r   r   r   r   rQ   )rX   rY   r   rZ   r[   r   r   r   rU   r]   r^   r_   r   transform_act_fnr   r   r   ra   rb   s      rH   rY   z,TFMobileBertPredictionHeadTransform.__init__#  s    "6"\\''?6C[C[3\cj ( 

 f''-$5f6G6G$HD!$*$5$5D! .v/A/A6K`K`grsrJ   c                l    | j                  |      }| j                  |      }| j                  |      }|S rf   )rU   rp  r   rg   s     rH   ri   z(TFMobileBertPredictionHeadTransform.call/  s4    

=1--m<}5rJ   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY wr   r#  rq   s     rH   ro   z)TFMobileBertPredictionHeadTransform.build5  r$  r   rf   rt   rv   s   @rH   rn  rn  "  s    
	+rJ   rn  c                  D     e Zd Z fdZddZd Zd Zd Zd Zd Z	 xZ
S )	TFMobileBertLMPredictionHeadc                V    t        |   di | t        |d      | _        || _        y )N	transformrV   rQ   )rX   rY   rn  rv  ra   rb   s      rH   rY   z%TFMobileBertLMPredictionHead.__init__B  s(    "6"<V+VrJ   c                   | j                  | j                  j                  fddd      | _        | j                  | j                  j                  | j                  j
                  z
  | j                  j                  fddd      | _        | j                  | j                  j                  | j                  j
                  fddd      | _        | j                  ry d| _        t        | dd       Nt        j                  | j                  j                        5  | j                  j                  d        d d d        y y # 1 sw Y   y xY w)Nr   Tr   )r   r   	trainablerW   zdense/weightzdecoder/weightrv  )r   ra   r   r   r   r   rU   decoderrl   rm   r7   rn   rv  rW   ro   rq   s     rH   ro   z"TFMobileBertLMPredictionHead.buildG  s   OO4;;+A+A*CQXdhouOv	__;;**T[[-G-GGI_I_`	 % 

 ;;))4;;+E+EF!	 ' 
 ::
4d+7t~~223 +$$T*+ + 8+ +s   D??Ec                    | S rf   rQ   r=   s    rH   get_output_embeddingsz2TFMobileBertLMPredictionHead.get_output_embeddings]  s    rJ   c                L    || _         t        |      d   | j                  _        y ri  )ry  r   ra   r   r=   r   s     rH   set_output_embeddingsz2TFMobileBertLMPredictionHead.set_output_embeddings`  s    !+E!21!5rJ   c                    d| j                   iS )Nr   )r   r{  s    rH   get_biasz%TFMobileBertLMPredictionHead.get_biasd  s    		""rJ   c                X    |d   | _         t        |d         d   | j                  _        y )Nr   r   )r   r   ra   r   r~  s     rH   set_biasz%TFMobileBertLMPredictionHead.set_biasg  s'    &M	!+E&M!:1!=rJ   c                    | j                  |      }t        j                  |t        j                  t        j                  | j
                        | j                  gd            }|| j                  z   }|S )Nr   r   )rv  r7   r   r   r   ry  rU   r   rg   s     rH   ri   z!TFMobileBertLMPredictionHead.callk  sY    }5		-BLL<VX\XbXb;cjk1lm%		1rJ   rf   )rM   rN   rO   rY   ro   r|  r  r  r  ri   ru   rv   s   @rH   rt  rt  A  s&    
+,6#>rJ   rt  c                  ,     e Zd Z fdZd ZddZ xZS )TFMobileBertMLMHeadc                H    t        |   di | t        |d      | _        y )NpredictionsrV   rQ   )rX   rY   rt  r  rb   s      rH   rY   zTFMobileBertMLMHead.__init__s  s"    "6"7]SrJ   c                (    | j                  |      }|S rf   r  )r=   sequence_outputprediction_scoress      rH   ri   zTFMobileBertMLMHead.callw  s     ,,_=  rJ   c                    | j                   ry d| _         t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   y xY w)NTr  )rl   rm   r7   rn   r  rW   ro   rq   s     rH   ro   zTFMobileBertMLMHead.build{  sm    ::
4-9t//445 -  &&t,- - :- -   A11A:rf   rt   rv   s   @rH   r  r  r  s    T!-rJ   r  c                  d     e Zd ZeZd fd	Zd Zd Zd Ze		 	 	 	 	 	 	 	 	 	 dd       Z
d	dZ xZS )
TFMobileBertMainLayerc                :   t        |   di | || _        |j                  | _        |j                  | _        |j
                  | _        |j                  | _        t        |d      | _	        t        |d      | _        |rt        |d      | _        y d | _        y )Nr   rV   encoderpoolerrQ   )rX   rY   ra   rS  r   rR  use_return_dictr\  r   r   rP  r  rb  r  )r=   ra   add_pooling_layerrc   rd   s       rH   rY   zTFMobileBertMainLayer.__init__  s    "6"!'!9!9!'!9!9$*$?$?!!110lK*6	BCT(h?Z^rJ   c                    | j                   S rf   )r   r{  s    rH   get_input_embeddingsz*TFMobileBertMainLayer.get_input_embeddings  s    rJ   c                `    || j                   _        t        |      d   | j                   _        y ri  )r   r   r   r   r~  s     rH   set_input_embeddingsz*TFMobileBertMainLayer.set_input_embeddings  s$    !&%/%6q%9"rJ   c                    t         )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        r  )r=   heads_to_prunes     rH   _prune_headsz"TFMobileBertMainLayer._prune_heads  s
    
 "!rJ   c           	     `   ||t        d      |t        |      }n|t        |      d d }nt        d      |t        j                  |d      }|t        j                  |d      }| j	                  |||||
      }t        j
                  ||d   dd|d   f      }t        j                  ||j                        }t        j                  d|j                        }t        j                  d	|j                        }t        j                  t        j                  ||      |      }|t        d g| j                  z  }| j                  ||||||	|
      }|d   }| j                  | j                  |      nd }|	s
||f|dd  z   S t        |||j                   |j"                  
      S )NzDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embedsr$   r   r   r0   g      ?g     )rY  pooler_outputrh   rZ  )r   r   r7   r   r   r<   r:   r1   rG  multiplysubtractr  rS  r  r  r   rh   rZ  )r=   r   r   r   r   r   r   r   rR  r\  r   rr   embedding_outputextended_attention_maskone_cstten_thousand_cstencoder_outputsr  rk  s                      rH   ri   zTFMobileBertMainLayer.call  s     ]%>cdd"$Y/K&$]3CR8KTUU!WW[!4N!WW[!4N??9lNTalt?u #%**^k!naQRT_`aTb=c"d #%''*AIYI_I_"`++c)9)?)?@;;x7G7M7MN"$++bkk'CZ.[]m"n  %%!7!77I,,#  ' 
 *!,8<8OO4UY  #$ $
 ,-')77&11	
 	
rJ   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   qxY w# 1 sw Y   y xY w)NTr   r  r  )	rl   rm   r7   rn   r   rW   ro   r  r  rq   s     rH   ro   zTFMobileBertMainLayer.build  s   ::
4t,8t334 ,%%d+,4D)5t||001 )""4()44(4t{{//0 (!!$'( ( 5, ,) )( ($   D%%D1?D=%D.1D:=E)T
NNNNNNNNNFrf   )rM   rN   rO   r%   config_classrY   r  r  r  r   ri   ro   ru   rv   s   @rH   r  r    sY    #L_:"  !Q
 Q
f(rJ   r  c                      e Zd ZdZeZdZy)TFMobileBertPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    
mobilebertN)rM   rN   rO   rP   r%   r  base_model_prefixrQ   rJ   rH   r  r    s    
 $L$rJ   r  c                  X    e Zd ZU dZdZded<   dZded<   dZded<   dZded<   dZ	ded	<   y)
 TFMobileBertForPreTrainingOutputaE  
    Output type of [`TFMobileBertForPreTraining`].

    Args:
        prediction_logits (`tf.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
        seq_relationship_logits (`tf.Tensor` of shape `(batch_size, 2)`):
            Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
            before SoftMax).
        hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
            `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nztf.Tensor | Nonelossprediction_logitsseq_relationship_logitsztuple[tf.Tensor] | Nonerh   rZ  )
rM   rN   rO   rP   r  __annotations__r  r  rh   rZ  rQ   rJ   rH   r  r    sB    , "D
!*.'.04-4-1M*1*.J'.rJ   r  a	  

    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
    as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
    behavior.

    <Tip>

    TensorFlow models and layers in `transformers` accept two formats as input:

    - having all inputs as keyword arguments (like PyTorch models), or
    - having all inputs as a list, tuple or dict in the first positional argument.

    The reason the second format is supported is that Keras methods prefer this format when passing inputs to models
    and layers. Because of this support, when using methods like `model.fit()` things should "just work" for you - just
    pass your inputs and labels in any format that `model.fit()` supports! If, however, you want to use the second
    format outside of Keras methods like `fit()` and `predict()`, such as when creating your own layers or models with
    the Keras `Functional` API, there are three possibilities you can use to gather all the input Tensors in the first
    positional argument:

    - a single Tensor with `input_ids` only and nothing else: `model(input_ids)`
    - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
    `model([input_ids, attention_mask])` or `model([input_ids, attention_mask, token_type_ids])`
    - a dictionary with one or several input Tensors associated to the input names given in the docstring:
    `model({"input_ids": input_ids, "token_type_ids": token_type_ids})`

    Note that when creating models and layers with
    [subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) then you don't need to worry
    about any of this, as you can just pass inputs like you would to any other Python function!

    </Tip>

    Parameters:
        config ([`MobileBertConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`Numpy array` or `tf.Tensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.__call__`] and
            [`PreTrainedTokenizer.encode`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`Numpy array` or `tf.Tensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`Numpy array` or `tf.Tensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`Numpy array` or `tf.Tensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`Numpy array` or `tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`tf.Tensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the
            config will be used instead.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
            used instead.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in
            eager mode, in graph mode the value will always be set to True.
        training (`bool`, *optional*, defaults to `False`):
            Whether or not to use the model in training mode (some modules like dropout modules have different
            behaviors between training and evaluation).
zdThe bare MobileBert Model transformer outputting raw hidden-states without any specific head on top.c                       e Zd Z fdZe eej                  d             ee	e
e      	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd                     ZddZ xZS )	TFMobileBertModelc                P    t        |   |g|i | t        |d      | _        y )Nr  rV   )rX   rY   r  r  r=   ra   r   rc   rd   s       rH   rY   zTFMobileBertModel.__init__  s(    3&3F3/\JrJ   batch_size, sequence_length
checkpointoutput_typer  c                <    | j                  |||||||||	|

      }|S )N)
r   r   r   r   r   r   r   rR  r\  r   )r  )r=   r   r   r   r   r   r   r   rR  r\  r   r   s               rH   ri   zTFMobileBertModel.call  s<    ( //))%'/!5# " 
 rJ   c                    | j                   ry d| _         t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   y xY w)NTr  )rl   rm   r7   rn   r  rW   ro   rq   s     rH   ro   zTFMobileBertModel.build  si    ::
4t,8t334 ,%%d+, , 9, ,r  r  )r   TFModelInputType | Noner   np.ndarray | tf.Tensor | Noner   r  r   r  r   r  r   r  r   bool | NonerR  r  r\  r  r   r  rL   z$tuple | TFBaseModelOutputWithPoolingrf   )rM   rN   rO   rY   r   r!   MOBILEBERT_INPUTS_DOCSTRINGformatr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOCri   ro   ru   rv   s   @rH   r  r    s    
K *+F+M+MNk+lm&0$ .28<8<6:377;)-,0#' %* 6 6	
 4 1 5 ' * !  
. n 8,rJ   r  z
    MobileBert Model with two heads on top as done during the pretraining: a `masked language modeling` head and a
    `next sentence prediction (classification)` head.
    c                       e Zd Z fdZd Zd Ze eej                  d             e
ee      	 	 	 	 	 	 	 	 	 	 	 	 d		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d
d                     ZddZd Z xZS )TFMobileBertForPreTrainingc                    t        |   |g|i | t        |d      | _        t	        |d      | _        t        |d      | _        y )Nr  rV   predictions___clsseq_relationship___cls)rX   rY   r  r  r  r  TFMobileBertOnlyNSPHeadseq_relationshipr  s       rH   rY   z#TFMobileBertForPreTraining.__init__  sH    3&3F3/\J.v<OP 7E] ^rJ   c                .    | j                   j                   S rf   r  r{  s    rH   get_lm_headz&TFMobileBertForPreTraining.get_lm_head      +++rJ   c                    t        j                  dt               | j                  dz   | j                  j                  z   dz   | j                  j                  j                  z   S NzMThe method get_prefix_bias_name is deprecated. Please use `get_bias` instead./)warningswarnFutureWarningrW   r  r{  s    rH   get_prefix_bias_namez/TFMobileBertForPreTraining.get_prefix_bias_name  sM    egtuyy3!1!1!6!66<t?O?O?[?[?`?```rJ   r  r  r  c                H   | j                  |||||||||	|
      }|dd \  }}| j                  |      }| j                  |      }d}|
 |d|
i}||d<   | j                  |||f      }|	s||f|dd z   }||f|z   S |S t	        ||||j
                  |j                        S )a9  
        Return:

        Examples:

        ```python
        >>> import tensorflow as tf
        >>> from transformers import AutoTokenizer, TFMobileBertForPreTraining

        >>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
        >>> model = TFMobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
        >>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :]  # Batch size 1
        >>> outputs = model(input_ids)
        >>> prediction_scores, seq_relationship_scores = outputs[:2]
        ```	r   r   r   r   r   r   rR  r\  r   Nr   r-   r2   r-   r>   )r  r  r  rh   rZ  )r  r  r  rI   r  rh   rZ  )r=   r   r   r   r   r   r   r   rR  r\  r-   r2   r   r   r  rk  r  seq_relationship_score
total_lossd_labelsr   s                        rH   ri   zTFMobileBertForPreTraining.call  s   B //))%'/!5# " 
 *1!& ,,_=!%!6!6}!E
"5"A &)H.AH*+--XGXZpFq-rJ')?@712;NF/9/EZMF*Q6Q//$:!//))
 	
rJ   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   qxY w# 1 sw Y   y xY w)NTr  r  r  )	rl   rm   r7   rn   r  rW   ro   r  r  rq   s     rH   ro   z TFMobileBertForPreTraining.build  s
   ::
4t,8t334 ,%%d+,4-9t//445 -  &&t,-4+T2>t4499: 2%%++D12 2 ?, ,- -2 2r  c                    |dk(  r|dfS |fS Nzcls.predictions.decoder.weightz,mobilebert.embeddings.word_embeddings.weightrQ   r=   	tf_weights     rH   tf_to_pt_weight_renamez1TFMobileBertForPreTraining.tf_to_pt_weight_rename-      88LLL<rJ   NNNNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r   r  r   r  rR  r  r\  r  r-   r  r2   r  r   r  rL   z(tuple | TFMobileBertForPreTrainingOutputrf   )rM   rN   rO   rY   r  r  r   r!   r  r  r#   r  r  ri   ro   r  ru   rv   s   @rH   r  r    s   _,a *+F+M+MNk+lm+KZij .28<8<6:377;)-,0#'04=A %?
*?
 6?
 6	?

 4?
 1?
 5?
 '?
 *?
 !?
 .?
 ;?
 ?
 
2?
 k n ?
B2 rJ   r  z8MobileBert Model with a `language modeling` head on top.c            	           e Zd Zg dZ fdZd Zd Ze ee	j                  d             eeeedd      	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd	                     Zdd
Zd Z xZS )TFMobileBertForMaskedLM)r  r  cls.seq_relationshipc                v    t        |   |g|i | t        |dd      | _        t	        |d      | _        y )NFr  r  rW   r  rV   )rX   rY   r  r  r  r  r  s       rH   rY   z TFMobileBertForMaskedLM.__init__=  s;    3&3F3/%Vbc.v<OPrJ   c                .    | j                   j                   S rf   r  r{  s    rH   r  z#TFMobileBertForMaskedLM.get_lm_headC  r  rJ   c                    t        j                  dt               | j                  dz   | j                  j                  z   dz   | j                  j
                  j                  z   S r  )r  r  r  rW   mlmr  r{  s    rH   r  z,TFMobileBertForMaskedLM.get_prefix_bias_nameF  sG    egtuyy3.4txx7K7K7P7PPPrJ   r  z'paris'g=
ףp=?r  r  r  expected_outputexpected_lossc                   | j                  |||||||||	|
      }|d   }| j                  ||      }|
dn| j                  |
|      }|	s|f|dd z   }||f|z   S |S t        |||j                  |j
                        S )az  
        labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels
        r  r   r   Nr   r  r>   rh   rZ  )r  r  rI   r	   rh   rZ  )r=   r   r   r   r   r   r   r   rR  r\  r-   r   r   r  r  r  r   s                    rH   ri   zTFMobileBertForMaskedLM.callJ  s    : //))%'/!5# " 
 "!* ,,_x,P~t4+?+?HY+Z')GABK7F)-)9TGf$EvE$!//))	
 	
rJ   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr  r  )rl   rm   r7   rn   r  rW   ro   r  rq   s     rH   ro   zTFMobileBertForMaskedLM.build  s    ::
4t,8t334 ,%%d+,4-9t//445 -  &&t,- - :, ,- -r  c                    |dk(  r|dfS |fS r  rQ   r  s     rH   r  z.TFMobileBertForMaskedLM.tf_to_pt_weight_rename  r  rJ   NNNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r   r  r   r  rR  r  r\  r  r-   r  r   r  rL   ztuple | TFMaskedLMOutputrf   )rM   rN   rO   "_keys_to_ignore_on_load_unexpectedrY   r  r  r   r!   r  r  r   r  r	   r  ri   ro   r  ru   rv   s   @rH   r  r  4  s   *&Q,Q *+F+M+MNk+lm&$$! .28<8<6:377;)-,0#'04 %.
*.
 6.
 6	.

 4.
 1.
 5.
 '.
 *.
 !.
 ..
 .
 
".
 n .
`	- rJ   r  c                  ,     e Zd Z fdZd ZddZ xZS )r  c                ~    t        |   di | t        j                  j	                  dd      | _        || _        y )Nr   r  rV   rQ   )rX   rY   r   rZ   r[   r  ra   rb   s      rH   rY   z TFMobileBertOnlyNSPHead.__init__  s7    "6" % 2 21;M 2 NrJ   c                (    | j                  |      }|S rf   )r  )r=   rk  r  s      rH   ri   zTFMobileBertOnlyNSPHead.call  s    !%!6!6}!E%%rJ   c                (   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   y xY w)NTr  )	rl   rm   r7   rn   r  rW   ro   ra   r   rq   s     rH   ro   zTFMobileBertOnlyNSPHead.build  s    ::
4+T2>t4499: S%%++T49P9P,QRS S ?S Srs   rf   rt   rv   s   @rH   r  r    s    
&SrJ   r  zPMobileBert Model with a `next sentence prediction (classification)` head on top.c                       e Zd ZddgZ fdZe eej                  d             e	e
e      	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	d                     Zd
dZ xZS )%TFMobileBertForNextSentencePredictionr  cls.predictionsc                t    t        |   |g|i | t        |d      | _        t	        |d      | _        y )Nr  rV   r  )rX   rY   r  r  r  clsr  s       rH   rY   z.TFMobileBertForNextSentencePrediction.__init__  s7    3&3F3/\J*68PQrJ   r  r  c                   | j                  |||||||||	|
      }|d   }| j                  |      }|
dn| j                  |
|      }|	s|f|dd z   }||f|z   S |S t        |||j                  |j
                        S )a  
        Return:

        Examples:

        ```python
        >>> import tensorflow as tf
        >>> from transformers import AutoTokenizer, TFMobileBertForNextSentencePrediction

        >>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
        >>> model = TFMobileBertForNextSentencePrediction.from_pretrained("google/mobilebert-uncased")

        >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
        >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
        >>> encoding = tokenizer(prompt, next_sentence, return_tensors="tf")

        >>> logits = model(encoding["input_ids"], token_type_ids=encoding["token_type_ids"])[0]
        ```r  r$   Nr  r   r  )r  r  rI   r   rh   rZ  )r=   r   r   r   r   r   r   r   rR  r\  r2   r   r   rk  seq_relationship_scoresnext_sentence_lossr   s                    rH   ri   z*TFMobileBertForNextSentencePrediction.call  s    F //))%'/!5# " 
  
"&((="9 #* %%-@I`%a 	 -/'!"+=F7I7U')F2a[aa,#*!//))	
 	
rJ   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr  r  )rl   rm   r7   rn   r  rW   ro   r  rq   s     rH   ro   z+TFMobileBertForNextSentencePrediction.build  s    ::
4t,8t334 ,%%d+,4%1txx}}- %t$% % 2, ,% %r  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rR  r  r\  r  r2   r  r   r  rL   z%tuple | TFNextSentencePredictorOutputrf   )rM   rN   rO   r  rY   r   r!   r  r  r#   r   r  ri   ro   ru   rv   s   @rH   r  r    s     +?@R)S&R *+F+M+MNk+lm+HWfg .28<8<6:377;)-,0#'=A %>
*>
 6>
 6	>

 4>
 1>
 5>
 '>
 *>
 !>
 ;>
 >
 
/>
 h n >
@	%rJ   r  z
    MobileBert Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c            	           e Zd Zg dZdgZ fdZe eej                  d             e
eeeee      	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	d                     Zd
dZ xZS )%TFMobileBertForSequenceClassificationr  r  r  r  r   c                   t        |   |g|i | |j                  | _        t        |d      | _        |j
                  |j
                  n|j                  }t        j                  j                  |      | _
        t        j                  j                  |j                  t        |j                        d      | _        || _        y )Nr  rV   
classifierr   rX   rY   
num_labelsr  r  classifier_dropoutr   r   rZ   r   r   r[   r   r   r  ra   r=   ra   r   rc   r
  rd   s        rH   rY   z.TFMobileBertForSequenceClassification.__init__  s    3&3F3 ++/\J)/)B)B)NF%%TZTnTn 	 ||++,>?,,,,/&BZBZ2[bn - 
 rJ   r  r  c                &   | j                  |||||||||	|
      }|d   }| j                  ||      }| j                  |      }|
dn| j                  |
|      }|	s|f|dd z   }||f|z   S |S t	        |||j
                  |j                        S )a  
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        r  r$   r   Nr   r  )r  r   r  rI   r   rh   rZ  )r=   r   r   r   r   r   r   r   rR  r\  r-   r   r   rk  r>   r  r   s                    rH   ri   z*TFMobileBertForSequenceClassification.call$  s    : //))%'/!5# " 
  
]XF/~t4+?+?+OY,F)-)9TGf$EvE)!//))	
 	
rJ   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   |xY w# 1 sw Y   y xY wNTr  r  
rl   rm   r7   rn   r  rW   ro   r  ra   r   rq   s     rH   ro   z+TFMobileBertForSequenceClassification.build_      ::
4t,8t334 ,%%d+,4t,8t334 M%%tT4;;3J3J&KLM M 9, ,M M   C"%3C."C+.C7r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rR  r  r\  r  r-   r  r   r  rL   z"tuple | TFSequenceClassifierOutputrf   )rM   rN   rO   r  _keys_to_ignore_on_load_missingrY   r   r!   r  r  r   '_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATIONr   r  _SEQ_CLASS_EXPECTED_OUTPUT_SEQ_CLASS_EXPECTED_LOSSri   ro   ru   rv   s   @rH   r  r    s   *& (2l# *+F+M+MNk+lm:.$2. .28<8<6:377;)-,0#'04 %0
*0
 60
 6	0

 40
 10
 50
 '0
 *0
 !0
 .0
 0
 
,0
 n 0
d	MrJ   r  z
    MobileBert Model with a span classification head on top for extractive question-answering tasks like SQuAD (a
    linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
    c                       e Zd Zg dZ fdZe eej                  d             e	e
eeeeee      	 	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd                     Zd	dZ xZS )
 TFMobileBertForQuestionAnsweringr  r  r  r  r  c                
   t        |   |g|i | |j                  | _        t        |dd      | _        t
        j                  j                  |j                  t        |j                        d      | _
        || _        y )NFr  r  
qa_outputsr   )rX   rY   r	  r  r  r   rZ   r[   r   r   r  ra   r  s       rH   rY   z)TFMobileBertForQuestionAnswering.__init__|  su    3&3F3 ++/%Vbc,,,,/&BZBZ2[bn - 
 rJ   r  )r  r  r  qa_target_start_indexqa_target_end_indexr  r  c                   | j                  |||||||||	|
      }|d   }| j                  |      }t        j                  |dd      \  }}t        j                  |d      }t        j                  |d      }d}|
||
|d}| j                  |||f      }|	s||f|dd z   }||f|z   S |S t        ||||j                  |j                        S )	a  
        start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        r  r   r   r   r   N)start_positionend_position)r  start_logits
end_logitsrh   rZ  )	r  r  r7   splitsqueezerI   r   rh   rZ  )r=   r   r   r   r   r   r   r   rR  r\  start_positionsend_positionsr   r   r  r>   r   r!  r  r-   r   s                        rH   ri   z%TFMobileBertForQuestionAnswering.call  s   H //))%'/!5# " 
 "!*1#%88FAB#? jzz,R8ZZ
4
&=+D(7WF''z0JKD"J/'!"+=F)-)9TGf$EvE-%!!//))
 	
rJ   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   |xY w# 1 sw Y   y xY w)NTr  r  )
rl   rm   r7   rn   r  rW   ro   r  ra   r   rq   s     rH   ro   z&TFMobileBertForQuestionAnswering.build  r  r  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rR  r  r\  r  r$  r  r%  r  r   r  rL   z&tuple | TFQuestionAnsweringModelOutputrf   )rM   rN   rO   r  rY   r   r!   r  r  r   _CHECKPOINT_FOR_QAr   r  _QA_TARGET_START_INDEX_QA_TARGET_END_INDEX_QA_EXPECTED_OUTPUT_QA_EXPECTED_LOSSri   ro   ru   rv   s   @rH   r  r  k  s   *& *+F+M+MNk+lm%2$40+' .28<8<6:377;)-,0#'9=7; %;
*;
 6;
 6	;

 4;
 1;
 5;
 ';
 *;
 !;
 7;
 5;
 ;
 
0;
 n ;
z	MrJ   r  z
    MobileBert Model with a multiple choice classification head on top (a linear layer on top of the pooled output and
    a softmax) e.g. for RocStories/SWAG tasks.
    c                       e Zd Zg dZdgZ fdZe eej                  d             e
eee      	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	d                     Zd
dZ xZS )TFMobileBertForMultipleChoicer  r   c                .   t        |   |g|i | t        |d      | _        t        j
                  j                  |j                        | _        t        j
                  j                  dt        |j                        d      | _        || _        y )Nr  rV   r$   r  r   )rX   rY   r  r  r   rZ   r   r   r   r[   r   r   r  ra   r  s       rH   rY   z&TFMobileBertForMultipleChoice.__init__  s{    3&3F3/\J||++F,F,FG,,,,/&2J2J"KR^ - 
 rJ   z(batch_size, num_choices, sequence_lengthr  c                   |t        |      d   }t        |      d   }nt        |      d   }t        |      d   }|t        j                  |d|f      nd}|t        j                  |d|f      nd}|t        j                  |d|f      nd}|t        j                  |d|f      nd}|%t        j                  |d|t        |      d   f      nd}| j                  |||||||||	|
      }|d   }| j	                  ||      }| j                  |      }t        j                  |d|f      }|
dn| j                  |
|      }|	s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )	a5  
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
            where `num_choices` is the size of the second dimension of the input tensors. (See `input_ids` above)
        Nr$   r   r   r   )r\  r   r   r  )
r   r7   r<   r  r   r  rI   r
   rh   rZ  )r=   r   r   r   r   r   r   r   rR  r\  r-   r   num_choices
seq_lengthflat_input_idsflat_attention_maskflat_token_type_idsflat_position_idsflat_inputs_embedsr   rk  r>   reshaped_logitsr  r   s                            rH   ri   z"TFMobileBertForMultipleChoice.call  s   8  $Y/2K#I.q1J$]3A6K#M215JDMDYIJ/?@_cN\Nhbjj"j9IJnrN\Nhbjj"j9IJnrJVJbBJJ|b*5EFhl ( JJ}r:z-7PQR7S&TU 	
 // # " 
  
]XF/**Vb+->?~t4+?+?+X%''!"+5F)-)9TGf$EvE*"!//))	
 	
rJ   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   |xY w# 1 sw Y   y xY wr  r  rq   s     rH   ro   z#TFMobileBertForMultipleChoice.build?  r  r  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rR  r  r\  r  r-   r  r   r  rL   z#tuple | TFMultipleChoiceModelOutputrf   )rM   rN   rO   r  r  rY   r   r!   r  r  r   r  r
   r  ri   ro   ru   rv   s   @rH   r-  r-    s   *& (2l# *#**+UV  &/$ .28<8<6:377;)-,0#'04 %?
*?
 6?
 6	?

 4?
 1?
 5?
 '?
 *?
 !?
 .?
 ?
 
-?
 ?
B	MrJ   r-  z
    MobileBert Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g.
    for Named-Entity-Recognition (NER) tasks.
    c            	           e Zd Zg dZdgZ fdZe eej                  d             e
eeeee      	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	d                     Zd
dZ xZS )"TFMobileBertForTokenClassificationr  r   c                   t        |   |g|i | |j                  | _        t        |dd      | _        |j
                  |j
                  n|j                  }t        j                  j                  |      | _
        t        j                  j                  |j                  t        |j                        d      | _        || _        y )NFr  r  r  r   r  r  s        rH   rY   z+TFMobileBertForTokenClassification.__init__]  s    3&3F3 ++/%Vbc)/)B)B)NF%%TZTnTn 	 ||++,>?,,,,/&BZBZ2[bn - 
 rJ   r  r  c                &   | j                  |||||||||	|
      }|d   }| j                  ||      }| j                  |      }|
dn| j                  |
|      }|	s|f|dd z   }||f|z   S |S t	        |||j
                  |j                        S )z
        labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        r  r   r   Nr   r  )r  r   r  rI   r   rh   rZ  )r=   r   r   r   r   r   r   r   rR  r\  r-   r   r   r  r>   r  r   s                    rH   ri   z'TFMobileBertForTokenClassification.callk  s    6 //))%'/!5# " 
 "!*,,,J1~t4+?+?+OY,F)-)9TGf$EvE&!//))	
 	
rJ   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   |xY w# 1 sw Y   y xY wr  r  rq   s     rH   ro   z(TFMobileBertForTokenClassification.build  r  r  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rR  r  r\  r  r-   r  r   r  rL   ztuple | TFTokenClassifierOutputrf   )rM   rN   rO   r  r  rY   r   r!   r  r  r   $_CHECKPOINT_FOR_TOKEN_CLASSIFICATIONr   r  _TOKEN_CLASS_EXPECTED_OUTPUT_TOKEN_CLASS_EXPECTED_LOSSri   ro   ru   rv   s   @rH   r:  r:  K  s   *& (2l# *+F+M+MNk+lm7+$40 .28<8<6:377;)-,0#'04 %.
*.
 6.
 6	.

 4.
 1.
 5.
 '.
 *.
 !.
 ..
 .
 
).
 n .
`	MrJ   r:  )
r  r-  r  r  r  r  r:  r  r  r  )frP   
__future__r   r  dataclassesr   numpynp
tensorflowr7   activations_tfr   modeling_tf_outputsr   r   r	   r
   r   r   r   r   modeling_tf_utilsr   r   r   r   r   r   r   r   r   r   r   r   tf_utilsr   r   r   utilsr   r   r    r!   r"   r#   configuration_mobilebertr%   
get_loggerrM   loggerr  r  r>  r?  r@  r'  r*  r+  r(  r)  r  r  r  r)   rZ   LayerrS   LayerNormalizationrx   r   r   r   r   r   r   r  r  r  r'  r3  r8  r?  rP  rb  rn  rt  r  r  r  r  MOBILEBERT_START_DOCSTRINGr  r  r  r  r  r  r  r  r-  r:  __all__rQ   rJ   rH   <module>rR     s     "  !   /	 	 	    S R  7 
		H	%1 $ (L $l !  = '     +E '' ! Q Q8Mu||11 M64%,,11 40u||!! 0 %
:e U\\// e Pb 2 2 bJ+U\\// +B%3ELL.. %3P+++ +8&,++ &,R+** +4,+5<<%% ,+^+%,,$$ +43## 3.N&** N&b1&%,,,, 1&hH++ H@+%,,*<*< +>.5<<#5#5 .b-%,,,, -$ ~(ELL.. ~( ~(B%"3 % /{ / /<( T5 p j.,3 .,	.,b  d !<>Y d d N TVpq] 9;W ]  r] @Sell00 S& ZV%,GIe V%	V%r  \M,GIe \M\M~  eM'BD[ eMeMP  gM$?AU gMgMT  [M)DF_ [M[M|rJ   