
    rh                        d dl Z d dlZd dlZd dlmZ d dlmZmZ d dlZd dlm	Z	 d dl
mZmZmZ ddlmZ ddlmZmZmZmZmZmZmZmZ dd	lmZ dd
lmZmZ ddlmZm Z m!Z! ddl"m#Z#  e!jH                  e%      Z&d Z' G d de	jP                        Z)e	jT                  e)dZ+ G d de	jP                        Z, G d de	jP                        Z- G d de	jP                        Z. G d de	jP                        Z/ G d de	jP                        Z0 G d de	jP                        Z1 G d de	jP                        Z2 G d  d!e	jP                        Z3 G d" d#e	jP                        Z4 G d$ d%e	jP                        Z5 G d& d'e	jP                        Z6 G d( d)e	jP                        Z7 G d* d+e	jP                        Z8 G d, d-e	jP                        Z9 G d. d/e	jP                        Z: G d0 d1e	jP                        Z; G d2 d3e	jP                        Z< G d4 d5e	jP                        Z=e  G d6 d7e             Z>e e d89       G d: d;e                    Z?e  G d< d=e>             Z@ e d>9       G d? d@e>             ZAe  G dA dBe>             ZB G dC dDe	jP                        ZC e dE9       G dF dGe>             ZD e dH9       G dI dJe>             ZEe  G dK dLe>             ZFe  G dM dNe>             ZGe  G dO dPe>             ZHg dQZIy)R    N)	dataclass)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputMultipleChoiceModelOutputNextSentencePredictorOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)ModelOutputauto_docstringlogging   )MobileBertConfigc           	         	 ddl }ddl}ddl}t        j                  j                  |      }t        j                  d|        |j                  j                  |      }g }g }	|D ]^  \  }
}t        j                  d|
 d|        |j                  j                  ||
      }|j                  |
       |	j                  |       ` t        ||	      D ]  \  }
}|
j                  dd      }
|
j                  d	d
      }
|
j                  dd      }
|
j                  dd      }
|
j!                  d      }
t#        d |
D              r(t        j                  ddj%                  |
              | }|
D ]  }|j'                  d|      r|j!                  d|      }n|g}|d   dk(  s|d   dk(  rt)        |d      }nW|d   dk(  s|d   dk(  rt)        |d      }n:|d   dk(  rt)        |d      }n%|d   dk(  rt)        |d      }n	 t)        ||d         }t-        |      dk\  st/        |d         }||   } dd d k(  rt)        |d      }n|dk(  r|j1                  |      }	 |j2                  |j2                  k(  s"J d!|j2                   d"|j2                   d#       	 t        j                  d$|
        t9        j:                  |      |_         | S # t        $ r t        j                  d        w xY w# t*        $ r+ t        j                  ddj%                  |
              Y w xY w# t4        $ r1}|xj6                  |j2                  |j2                  fz  c_         d}~ww xY w)%z'Load tf checkpoints in a pytorch model.r   NzLoading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see https://www.tensorflow.org/install/ for installation instructions.z&Converting TensorFlow checkpoint from zLoading TF weight z with shape 	ffn_layerffnFakeLayerNorm	LayerNormextra_output_weightszdense/kernelbert
mobilebert/c              3   $   K   | ]  }|d v  
 yw))adam_vadam_mAdamWeightDecayOptimizerAdamWeightDecayOptimizer_1global_stepN ).0ns     /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/mobilebert/modeling_mobilebert.py	<genexpr>z0load_tf_weights_in_mobilebert.<locals>.<genexpr>V   s      
 nn
s   z	Skipping z[A-Za-z]+_\d+z_(\d+)kernelgammaweightoutput_biasbetabiasoutput_weightssquad
classifier   r   i_embeddingszPointer shape z and array shape z mismatchedzInitialize PyTorch weight )renumpy
tensorflowImportErrorloggererrorospathabspathinfotrainlist_variablesload_variableappendzipreplacesplitanyjoin	fullmatchgetattrAttributeErrorlenint	transposeshapeAssertionErrorargstorch
from_numpydata)modelconfigtf_checkpoint_pathr;   nptftf_path	init_varsnamesarraysnamerT   arraypointerm_namescope_namesnumes                     r.   load_tf_weights_in_mobilebertrj   5   sh   
 ggoo01G
KK8	BC''0IEF  e(l5'BC&&w5Te	 5&) 1/e||K/||O[9||2NC||FL1zz#  

 
 KK)CHHTN#345 	'F||,f5 hhy&9%h1~)[^w-F!'84Q=0KNf4L!'62Q#33!'84Q7*!'<8%g{1~>G ;1$+a.)!#,+	', #$<=(gx0GxLL'E	==EKK/  /@[Y/ 	078''.c1/d LI  Q	
 	b & KK)CHHTN+; <=  	FFw}}ekk22F	s5   K K6 ;L- K360L*)L*-	M'6,M""M'c                   X     e Zd Zd fd	Zdej
                  dej
                  fdZ xZS )NoNormc                     t         |           t        j                  t	        j
                  |            | _        t        j                  t	        j                  |            | _        y N)	super__init__r   	ParameterrW   zerosr5   onesr2   )self	feat_sizeeps	__class__s      r.   rp   zNoNorm.__init__   s@    LLY!78	ll5::i#89    input_tensorreturnc                 :    || j                   z  | j                  z   S rn   )r2   r5   )rt   ry   s     r.   forwardzNoNorm.forward   s    dkk)DII55rx   rn   __name__
__module____qualname__rp   rW   Tensorr|   __classcell__rw   s   @r.   rl   rl      s#    :
6ELL 6U\\ 6rx   rl   )
layer_normno_normc                        e Zd ZdZ fdZ	 	 	 	 d	deej                     deej                     deej                     deej                     dej                  f
dZ
 xZS )
MobileBertEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                 X   t         |           |j                  | _        |j                  | _        |j                  | _        t        j                  |j                  |j                  |j                        | _	        t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        | j                  rdnd}| j                  |z  }t        j                  ||j                        | _        t!        |j"                     |j                        | _        t        j&                  |j(                        | _        | j-                  dt/        j0                  |j                        j3                  d      d       y )N)padding_idxr
   r   position_ids)r   F)
persistent)ro   rp   trigram_inputembedding_sizehidden_sizer   	Embedding
vocab_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddingsLinearembedding_transformationNORM2FNnormalization_typer    Dropouthidden_dropout_probdropoutregister_bufferrW   arangeexpand)rt   r[   embed_dim_multiplierembedded_input_sizerw   s       r.   rp   zMobileBertEmbeddings.__init__   sF   #11$33!--!||F,=,=v?T?Tbhbubuv#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]"$($6$6qA"114HH(*		2EvGYGY(Z% !:!:;F<N<NOzz&"<"<= 	ELL)G)GHOOPWXej 	 	
rx   	input_idstoken_type_idsr   inputs_embedsrz   c           
      $   ||j                         }n|j                         d d }|d   }|| j                  d d d |f   }|:t        j                  |t        j                  | j                  j
                        }|| j                  |      }| j                  rpt        j                  t        j                  j                  |d d dd f   g dd      |t        j                  j                  |d d d df   g dd      gd	      }| j                  s| j                  | j                  k7  r| j                  |      }| j                  |      }| j!                  |      }||z   |z   }	| j#                  |	      }	| j%                  |	      }	|	S )
Nr   r   dtypedevice)r   r   r   r   r   r           )value)r   r   r   r   r   r   r9   dim)sizer   rW   rr   longr   r   r   catr   
functionalpadr   r   r   r   r   r    r   )
rt   r   r   r   r   input_shape
seq_lengthr   r   
embeddingss
             r.   r|   zMobileBertEmbeddings.forward   s     #..*K',,.s3K ^
,,Q^<L!"[[EJJtO`O`OgOghN  00;M "IIMM%%mAqrE&:<NVY%Z!MM%%mAssF&;=OWZ%[
 M !4!48H8H!H 99-HM #66|D $ : :> J"%88;PP
^^J/
\\*-
rx   )NNNN)r~   r   r   __doc__rp   r   rW   
LongTensorFloatTensorr   r|   r   r   s   @r.   r   r      s~    Q
0 155937590E,,-0 !!1!120 u//0	0
   1 120 
0rx   r   c                        e Zd Z fdZ	 	 	 d
dej
                  dej
                  dej
                  deej                     deej                     dee   de	ej
                     fd	Z
 xZS )MobileBertSelfAttentionc                 `   t         |           |j                  | _        t        |j                  |j                  z        | _        | j                  | j
                  z  | _        t        j                  |j                  | j                        | _	        t        j                  |j                  | j                        | _
        t        j                  |j                  r|j                  n|j                  | j                        | _        t        j                  |j                        | _        y rn   )ro   rp   num_attention_headsrR   true_hidden_sizeattention_head_sizeall_head_sizer   r   querykeyuse_bottleneck_attentionr   r   r   attention_probs_dropout_probr   rt   r[   rw   s     r.   rp   z MobileBertSelfAttention.__init__   s    #)#=#= #&v'>'>A[A['[#\ !558P8PPYYv668J8JK
99V44d6H6HIYY'-'F'FF##FL^L^`d`r`r

 zz&"E"EFrx   query_tensor
key_tensorvalue_tensorattention_mask	head_maskoutput_attentionsrz   c                    |j                   \  }}}	| j                  |      j                  |d| j                  | j                        j                  dd      }
| j                  |      j                  |d| j                  | j                        j                  dd      }| j                  |      j                  |d| j                  | j                        j                  dd      }t        j                  |
|j                  dd            }|t        j                  | j                        z  }|||z   }t        j                  j                  |d      }| j                  |      }|||z  }t        j                  ||      }|j!                  dddd      j#                         }|j%                         d d | j&                  fz   }|j                  |      }|r||f}|S |f}|S )Nr   r   r9   r   r   r
   )rT   r   viewr   r   rS   r   r   rW   matmulmathsqrtr   r   softmaxr   permute
contiguousr   r   )rt   r   r   r   r   r   r   
batch_sizer   _query_layer	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss                     r.   r|   zMobileBertSelfAttention.forward   s    %1$6$6!
JJJ|$T*b$":":D<T<TUYq!_ 	 HHZ T*b$":":D<T<TUYq!_ 	 JJ|$T*b$":":D<T<TUYq!_ 	 !<<Y5H5HR5PQ+dii8P8P.QQ%/.@--//0@b/I ,,7 -	9O_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**+BC6G=/2 O\M]rx   NNNr~   r   r   rp   rW   r   r   r   booltupler|   r   r   s   @r.   r   r      s    G$ 7;15,0-ll- LL- ll	-
 !!2!23- E--.- $D>- 
u||	-rx   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )MobileBertSelfOutputc                 j   t         |           |j                  | _        t        j                  |j
                  |j
                        | _        t        |j                     |j
                  |j                        | _
        | j                  s%t        j                  |j                        | _        y y Nrv   )ro   rp   use_bottleneckr   r   r   denser   r   layer_norm_epsr    r   r   r   r   s     r.   rp   zMobileBertSelfOutput.__init__  s    $33YYv668O8OP
 !:!:;F<S<SY_YnYno""::f&@&@ADL #rx   hidden_statesresidual_tensorrz   c                     | j                  |      }| j                  s| j                  |      }| j                  ||z         }|S rn   )r   r   r   r    rt   r   r   layer_outputss       r.   r|   zMobileBertSelfOutput.forward#  s@    

=1"" LL7M}'FGrx   r}   r   s   @r.   r   r     s2    BU\\ ELL UZUaUa rx   r   c                        e Zd Z fdZd Z	 	 	 ddej                  dej                  dej                  dej                  deej                     deej                     d	ee	   d
e
ej                     fdZ xZS )MobileBertAttentionc                     t         |           t        |      | _        t	        |      | _        t               | _        y rn   )ro   rp   r   rt   r   outputsetpruned_headsr   s     r.   rp   zMobileBertAttention.__init__,  s0    +F3	*62Erx   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   r   )rQ   r   rt   r   r   r   r   r   r   r   r   r   r   union)rt   headsindexs      r.   prune_headszMobileBertAttention.prune_heads2  s   u:?749900$))2O2OQUQbQb
u
 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:rx   r   r   r   layer_inputr   r   r   rz   c                 n    | j                  ||||||      }| j                  |d   |      }	|	f|dd  z   }
|
S )Nr   r   )rt   r   )rt   r   r   r   r   r   r   r   self_outputsattention_outputr   s              r.   r|   zMobileBertAttention.forwardD  sT     yy
  ;;|AD#%QR(88rx   r   )r~   r   r   rp   r   rW   r   r   r   r   r   r|   r   r   s   @r.   r   r   +  s    ";0 7;15,0ll LL ll	
 \\ !!2!23 E--. $D> 
u||	rx   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MobileBertIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y rn   )ro   rp   r   r   r   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnr   s     r.   rp   zMobileBertIntermediate.__init__^  s]    YYv668P8PQ
f''-'-f.?.?'@D$'-'8'8D$rx   r   rz   c                 J    | j                  |      }| j                  |      }|S rn   )r   r  rt   r   s     r.   r|   zMobileBertIntermediate.forwardf  s&    

=100?rx   r}   r   s   @r.   r   r   ]  s#    9U\\ ell rx   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )OutputBottleneckc                 .   t         |           t        j                  |j                  |j
                        | _        t        |j                     |j
                  |j                        | _
        t        j                  |j                        | _        y r   )ro   rp   r   r   r   r   r   r   r   r   r    r   r   r   r   s     r.   rp   zOutputBottleneck.__init__m  sh    YYv668J8JK
 !:!:;F<N<NTZTiTijzz&"<"<=rx   r   r   rz   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S rn   )r   r   r    r   s       r.   r|   zOutputBottleneck.forwards  s7    

=1]3}'FGrx   r}   r   s   @r.   r
  r
  l  s1    >U\\ ELL UZUaUa rx   r
  c                        e Zd Z fdZdej
                  dej
                  dej
                  dej
                  fdZ xZS )MobileBertOutputc                 r   t         |           |j                  | _        t        j                  |j
                  |j                        | _        t        |j                     |j                        | _
        | j                  s%t        j                  |j                        | _        y t        |      | _        y rn   )ro   rp   r   r   r   r  r   r   r   r   r    r   r   r   r
  
bottleneckr   s     r.   rp   zMobileBertOutput.__init__{  s    $33YYv779P9PQ
 !:!:;F<S<ST""::f&@&@ADL.v6DOrx   intermediate_statesresidual_tensor_1residual_tensor_2rz   c                     | j                  |      }| j                  s'| j                  |      }| j                  ||z         }|S | j                  ||z         }| j	                  ||      }|S rn   )r   r   r   r    r  )rt   r  r  r  layer_outputs        r.   r|   zMobileBertOutput.forward  ss     zz"56""<<5L>>,9J*JKL   >>,9J*JKL??<9JKLrx   r}   r   s   @r.   r  r  z  s?    7
#(<<
DILL
ejeqeq
	
rx   r  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BottleneckLayerc                     t         |           t        j                  |j                  |j
                        | _        t        |j                     |j
                  |j                        | _
        y r   )ro   rp   r   r   r   intra_bottleneck_sizer   r   r   r   r    r   s     r.   rp   zBottleneckLayer.__init__  sR    YYv1163O3OP
 !:!:;F<X<X^d^s^strx   r   rz   c                 J    | j                  |      }| j                  |      }|S rn   r   r    )rt   r   r   s      r.   r|   zBottleneckLayer.forward  s$    jj/nn[1rx   r}   r   s   @r.   r  r    s$    u
U\\ ell rx   r  c                   \     e Zd Z fdZdej
                  deej
                     fdZ xZS )
Bottleneckc                     t         |           |j                  | _        |j                  | _        t	        |      | _        | j                  rt	        |      | _        y y rn   )ro   rp   key_query_shared_bottleneckr   r  input	attentionr   s     r.   rp   zBottleneck.__init__  sP    +1+M+M((.(G(G%$V,
++,V4DN ,rx   r   rz   c                     | j                  |      }| j                  r|fdz  S | j                  r| j                  |      }||||fS ||||fS )N   )r   r   r  r!  )rt   r   bottlenecked_hidden_statesshared_attention_inputs       r.   r|   zBottleneck.forward  sc    " &*ZZ%>"((.0144--%)^^M%B"*,BMSmnn!=-A[\\rx   	r~   r   r   rp   rW   r   r   r|   r   r   s   @r.   r  r    s+    5]U\\ ]eELL6I ]rx   r  c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )	FFNOutputc                     t         |           t        j                  |j                  |j
                        | _        t        |j                     |j
                  |j                        | _
        y r   )ro   rp   r   r   r  r   r   r   r   r   r    r   s     r.   rp   zFFNOutput.__init__  sR    YYv779P9PQ
 !:!:;F<S<SY_YnYnorx   r   r   rz   c                 P    | j                  |      }| j                  ||z         }|S rn   r  r   s       r.   r|   zFFNOutput.forward  s)    

=1}'FGrx   r}   r   s   @r.   r(  r(    s2    p
U\\ ELL UZUaUa rx   r(  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )FFNLayerc                 b    t         |           t        |      | _        t	        |      | _        y rn   )ro   rp   r   intermediater(  r   r   s     r.   rp   zFFNLayer.__init__  s'    26:'rx   r   rz   c                 L    | j                  |      }| j                  ||      }|S rn   )r.  r   )rt   r   intermediate_outputr   s       r.   r|   zFFNLayer.forward  s*    "//>$7Grx   r}   r   s   @r.   r,  r,    s#    (
U\\ ell rx   r,  c                        e Zd Z fdZ	 	 	 ddej
                  deej                     deej                     dee   de	ej
                     f
dZ
 xZS )	MobileBertLayerc                    t         |           |j                  | _        |j                  | _        t	        |      | _        t        |      | _        t        |      | _	        | j                  rt        |      | _        |j                  dkD  rHt        j                  t        |j                  dz
        D cg c]  }t        |       c}      | _        y y c c}w Nr   )ro   rp   r   num_feedforward_networksr   r!  r   r.  r  r   r  r  r   
ModuleListranger,  r   rt   r[   r   rw   s      r.   rp   zMobileBertLayer.__init__  s    $33(.(G(G%,V426:&v.(0DO**Q.}}fFeFehiFi@j%k1hv&6%klDH /%ks   6Cr   r   r   r   rz   c           	         | j                   r| j                  |      \  }}}}n|gdz  \  }}}}| j                  |||||||      }	|	d   }
|
f}|	dd  }| j                  dk7  r+t	        | j
                        D ]  \  }} ||
      }
||
fz  } | j                  |
      }| j                  ||
|      }|f|z   t        j                  d      |||||
|fz   |z   }|S )Nr#  )r   r   r   i  )
r   r  r!  r5  	enumerater   r.  r   rW   tensor)rt   r   r   r   r   r   r   r   r   self_attention_outputsr   sr   i
ffn_moduler0  r  s                    r.   r|   zMobileBertLayer.forward  s>    BF//R_B`?L*lKCP/TUBU?L*lK!%/ "0 "
 2!4(,((A-!*488!4 ):#-.>#? &(() #//0@A{{#68H-XO T" #
  	 rx   r   r   r   s   @r.   r2  r2    sp    m  7;15,0.||. !!2!23. E--.	.
 $D>. 
u||	.rx   r2  c                        e Zd Z fdZ	 	 	 	 	 d
dej
                  deej                     deej                     dee   dee   dee   de	e
ef   fd	Z xZS )MobileBertEncoderc                     t         |           t        j                  t	        |j
                        D cg c]  }t        |       c}      | _        y c c}w rn   )ro   rp   r   r6  r7  num_hidden_layersr2  layerr8  s      r.   rp   zMobileBertEncoder.__init__  s<    ]]U6KcKcEd#eOF$;#ef
#es   Ar   r   r   r   output_hidden_statesreturn_dictrz   c                     |rdnd }|rdnd }t        | j                        D ],  \  }	}
|r||fz   } |
||||	   |      }|d   }|s$||d   fz   }. |r||fz   }|st        d |||fD              S t        |||      S )Nr+   r   r   c              3   &   K   | ]	  }||  y wrn   r+   )r,   vs     r.   r/   z,MobileBertEncoder.forward.<locals>.<genexpr>=  s     hqZ[Zghs   )last_hidden_stater   
attentions)r:  rD  r   r   )rt   r   r   r   r   rE  rF  all_hidden_statesall_attentionsr>  layer_moduler   s               r.   r|   zMobileBertEncoder.forward  s     #7BD0d(4 	FOA|#$58H$H!(!!	M *!,M !/=3C2E!E	F    1]4D Dh]4E~$Vhhh+;LYg
 	
rx   )NNFFT)r~   r   r   rp   rW   r   r   r   r   r   r   r   r|   r   r   s   @r.   rA  rA    s    g 7;15,1/4&*"
||"
 !!2!23"
 E--.	"

 $D>"
 'tn"
 d^"
 
uo%	&"
rx   rA  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MobileBertPoolerc                     t         |           |j                  | _        | j                  r0t	        j
                  |j                  |j                        | _        y y rn   )ro   rp   classifier_activationdo_activater   r   r   r   r   s     r.   rp   zMobileBertPooler.__init__D  sH    !776#5#5v7I7IJDJ rx   r   rz   c                     |d d df   }| j                   s|S | j                  |      }t        j                  |      }|S )Nr   )rS  r   rW   tanh)rt   r   first_token_tensorpooled_outputs       r.   r|   zMobileBertPooler.forwardJ  sE     +1a40%% JJ'9:M!JJ}5M  rx   r}   r   s   @r.   rP  rP  C  s$    K	!U\\ 	!ell 	!rx   rP  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )!MobileBertPredictionHeadTransformc                 Z   t         |           t        j                  |j                  |j                        | _        t        |j                  t              rt        |j                     | _
        n|j                  | _
        t        d   |j                  |j                        | _        y )Nr   r   )ro   rp   r   r   r   r   r  r  r  r   transform_act_fnr   r   r    r   s     r.   rp   z*MobileBertPredictionHeadTransform.__init__W  s|    YYv1163E3EF
f''-$*6+<+<$=D!$*$5$5D! .v/A/AvG\G\]rx   r   rz   c                 l    | j                  |      }| j                  |      }| j                  |      }|S rn   )r   r[  r    r  s     r.   r|   z)MobileBertPredictionHeadTransform.forward`  s4    

=1--m<}5rx   r}   r   s   @r.   rY  rY  V  s$    ^U\\ ell rx   rY  c                   ^     e Zd Z fdZddZdej                  dej                  fdZ xZS )MobileBertLMPredictionHeadc                    t         |           t        |      | _        t	        j
                  |j                  |j                  |j                  z
  d      | _	        t	        j
                  |j                  |j                  d      | _
        t	        j                  t        j                  |j                              | _        | j                  | j                  _        y )NF)r5   )ro   rp   rY  	transformr   r   r   r   r   r   decoderrq   rW   rr   r5   r   s     r.   rp   z#MobileBertLMPredictionHead.__init__h  s    :6B YYv00&2D2DvG\G\2\chi
yy!6!68I8IPUVLLV->->!?@	 IIrx   rz   c                 :    | j                   | j                  _         y rn   )r5   ra  rt   s    r.   _tie_weightsz'MobileBertLMPredictionHead._tie_weightss  s     IIrx   r   c                    | j                  |      }|j                  t        j                  | j                  j
                  j                         | j                  j
                  gd            }|| j                  j                  z  }|S )Nr   r   )	r`  r   rW   r   ra  r2   tr   r5   r  s     r.   r|   z"MobileBertLMPredictionHead.forwardv  sk    }5%,,UYY8K8K8M8M8OQUQ[Q[QbQb7cij-kl***rx   )rz   N)	r~   r   r   rp   rd  rW   r   r|   r   r   s   @r.   r^  r^  g  s(    	&&U\\ ell rx   r^  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MobileBertOnlyMLMHeadc                 B    t         |           t        |      | _        y rn   )ro   rp   r^  predictionsr   s     r.   rp   zMobileBertOnlyMLMHead.__init__~  s    5f=rx   sequence_outputrz   c                 (    | j                  |      }|S rn   )rj  )rt   rk  prediction_scoress      r.   r|   zMobileBertOnlyMLMHead.forward  s     ,,_=  rx   r}   r   s   @r.   rh  rh  }  s#    >!u|| ! !rx   rh  c                   t     e Zd Z fdZdej
                  dej
                  deej
                     fdZ xZS )MobileBertPreTrainingHeadsc                     t         |           t        |      | _        t	        j
                  |j                  d      | _        y Nr9   )ro   rp   r^  rj  r   r   r   seq_relationshipr   s     r.   rp   z#MobileBertPreTrainingHeads.__init__  s4    5f= "		&*<*<a @rx   rk  rW  rz   c                 N    | j                  |      }| j                  |      }||fS rn   )rj  rr  )rt   rk  rW  rm  seq_relationship_scores        r.   r|   z"MobileBertPreTrainingHeads.forward  s0     ,,_=!%!6!6}!E "888rx   r&  r   s   @r.   ro  ro    s8    A
9u|| 9ELL 9UZ[`[g[gUh 9rx   ro  c                   &    e Zd ZU eed<   eZdZd Zy)MobileBertPreTrainedModelr[   r#   c                 x   t        |t        j                        rm|j                  j                  j                  d| j                  j                         |j                  %|j                  j                  j                          yyt        |t        j                        rz|j                  j                  j                  d| j                  j                         |j                  2|j                  j                  |j                     j                          yyt        |t        j                  t        f      rJ|j                  j                  j                          |j                  j                  j                  d       yt        |t              r%|j                  j                  j                          yy)zInitialize the weightsr   )meanstdNg      ?)r  r   r   r2   rY   normal_r[   initializer_ranger5   zero_r   r   r    rl   fill_r^  )rt   modules     r.   _init_weightsz'MobileBertPreTrainedModel._init_weights  s,   fbii( MM&&CT[[5R5R&S{{&  &&( '-MM&&CT[[5R5R&S!!-""6#5#56<<> .v 67KK""$MM$$S) :;KK""$ <rx   N)	r~   r   r   r   __annotations__rj   load_tf_weightsbase_model_prefixr  r+   rx   r.   rv  rv    s    3O$%rx   rv  z6
    Output type of [`MobileBertForPreTraining`].
    )custom_introc                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeeej                        ed<   dZeeej                        ed<   y)MobileBertForPreTrainingOutputa  
    loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
        Total loss as the sum of the masked language modeling loss and the next sequence prediction
        (classification) loss.
    prediction_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    seq_relationship_logits (`torch.FloatTensor` of shape `(batch_size, 2)`):
        Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
        before SoftMax).
    Nlossprediction_logitsseq_relationship_logitsr   rK  )r~   r   r   r   r  r   rW   r   r  r  r  r   r   rK  r+   rx   r.   r  r    s~    	 )-D(5$$
%,59x 1 129;?Xe&7&78?8<M8E%"3"345<59Ju00129rx   r  c                   <    e Zd ZdZd fd	Zd Zd Zd Ze	 	 	 	 	 	 	 	 	 dde	e
j                     de	e
j                     de	e
j                     d	e	e
j                     d
e	e
j                     de	e
j                     de	e   de	e   de	e   deeef   fd       Z xZS )MobileBertModelz2
    https://huggingface.co/papers/2004.02984
    c                     t         |   |       || _        t        |      | _        t        |      | _        |rt        |      nd| _        | j                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)
ro   rp   r[   r   r   rA  encoderrP  pooler	post_init)rt   r[   add_pooling_layerrw   s      r.   rp   zMobileBertModel.__init__  sN    
 	 .v6(02C&v. 	rx   c                 .    | j                   j                  S rn   r   r   rc  s    r.   get_input_embeddingsz$MobileBertModel.get_input_embeddings  s    ...rx   c                 &    || j                   _        y rn   r  )rt   r   s     r.   set_input_embeddingsz$MobileBertModel.set_input_embeddings  s    */'rx   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  rD  r!  r   )rt   heads_to_prunerD  r   s       r.   _prune_headszMobileBertModel._prune_heads  sE    
 +002 	CLE5LLu%//;;EB	Crx   r   r   r   r   r   r   rE  r   rF  rz   c
                 l   ||n| j                   j                  }||n| j                   j                  }|	|	n| j                   j                  }	||t	        d      |#| j                  ||       |j                         }
n!||j                         d d }
nt	        d      ||j                  n|j                  }|t        j                  |
|      }|&t        j                  |
t        j                  |      }| j                  ||
      }| j                  || j                   j                        }| j                  ||||      }| j!                  ||||||	      }|d   }| j"                  | j#                  |      nd }|	s
||f|d	d  z   S t%        |||j&                  |j(                  
      S )NzDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embeds)r   r   )r   r   r   r   )r   r   r   rE  rF  r   r   )rJ  pooler_outputr   rK  )r[   r   rE  use_return_dict
ValueError%warn_if_padding_and_no_attention_maskr   r   rW   rs   rr   r   get_extended_attention_maskget_head_maskrC  r   r  r  r   r   rK  )rt   r   r   r   r   r   r   rE  r   rF  r   r   extended_attention_maskembedding_outputencoder_outputsrk  rW  s                    r.   r|   zMobileBertModel.forward  s    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66y.Q#..*K&',,.s3KTUU%.%:!!@T@T!"ZZFCN!"[[EJJvVN 150P0PQ_al0m &&y$++2O2OP	??l>iv + 
 ,,2/!5# ' 
 *!,8<8OO4UY#]3oab6III)-')77&11	
 	
rx   )T)	NNNNNNNNN)r~   r   r   r   rp   r  r  r  r   r   rW   r   r   r   r   r   r   r|   r   r   s   @r.   r  r    s   /0C  156:59371559/3,0&*D
E,,-D
 !!2!23D
 !!1!12	D

 u//0D
 E--.D
   1 12D
 'tnD
 $D>D
 d^D
 
u00	1D
 D
rx   r  z
    MobileBert Model with two heads on top as done during the pretraining: a `masked language modeling` head and a
    `next sentence prediction (classification)` head.
    c                       e Zd ZddgZ fdZd Zd Zddee   de	j                  f fdZe	 	 	 	 	 	 	 	 	 	 	 dd	eej                     d
eej                     deej                     deej                     deej                     deej                     deej                     deej                     deej                     deej                     deej                     deeef   fd       Z xZS )MobileBertForPreTrainingcls.predictions.decoder.weightcls.predictions.decoder.biasc                     t         |   |       t        |      | _        t	        |      | _        | j                          y rn   )ro   rp   r  r#   ro  clsr  r   s     r.   rp   z!MobileBertForPreTraining.__init__9  s4     )&1-f5 	rx   c                 B    | j                   j                  j                  S rn   r  rj  ra  rc  s    r.   get_output_embeddingsz.MobileBertForPreTraining.get_output_embeddingsA      xx##+++rx   c                     || j                   j                  _        |j                  | j                   j                  _        y rn   r  rj  ra  r5   rt   new_embeddingss     r.   set_output_embeddingsz.MobileBertForPreTraining.set_output_embeddingsD  ,    '5$$2$7$7!rx   new_num_tokensrz   c                     | j                  | j                  j                  j                  |d      | j                  j                  _        t        |   |      S NT)r  
transposed)r  _get_resized_lm_headr  rj  r   ro   resize_token_embeddingsrt   r  rw   s     r.   r  z0MobileBertForPreTraining.resize_token_embeddingsH  sR    %)%>%>HH  &&~RV &? &
" w.n.MMrx   r   r   r   r   r   r   labelsnext_sentence_labelr   rE  rF  c                 
   ||n| j                   j                  }| j                  |||||||	|
|	      }|dd \  }}| j                  ||      \  }}d}|u|st	               } ||j                  d| j                   j                        |j                  d            } ||j                  dd      |j                  d            }||z   }|s||f|dd z   }||f|z   S |S t        ||||j                  |j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        next_sentence_label (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
            (see `input_ids` docstring) Indices should be in `[0, 1]`:

            - 0 indicates sequence B is a continuation of sequence A,
            - 1 indicates sequence B is a random sequence.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, MobileBertForPreTraining
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
        >>> model = MobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")

        >>> input_ids = torch.tensor(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True)).unsqueeze(0)
        >>> # Batch size 1
        >>> outputs = model(input_ids)

        >>> prediction_logits = outputs.prediction_logits
        >>> seq_relationship_logits = outputs.seq_relationship_logits
        ```Nr   r   r   r   r   r   rE  rF  r9   r   )r  r  r  r   rK  )
r[   r  r#   r  r   r   r   r  r   rK  )rt   r   r   r   r   r   r   r  r  r   rE  rF  r   rk  rW  rm  rt  
total_lossloss_fctmasked_lm_lossnext_sentence_lossr   s                         r.   r|   z MobileBertForPreTraining.forwardP  sG   V &1%<k$++B]B]//))%'/!5# " 

 *1!&48HH_m4\11
"5"A')H%&7&<&<RAWAW&XZ`ZeZefhZijN!)*@*E*Eb!*LNaNfNfgiNj!k'*<<J')?@712;NF/9/EZMF*Q6Q-/$:!//))
 	
rx   rn   NNNNNNNNNNN)r~   r   r   _tied_weights_keysrp   r  r  r   rR   r   r   r  r   rW   r   r   r   r   r  r|   r   r   s   @r.   r  r  0  s}    ;<Z[,8Nhsm Nr|| N  156:59371559-1:>9=<@37K
E,,-K
 !!2!23K
 !!1!12	K

 u//0K
 E--.K
   1 12K
 ))*K
 &e&6&67K
 $E$5$56K
 'u'8'89K
 e//0K
 
u44	5K
 K
rx   r  c                       e Zd ZddgZ fdZd Zd Zddee   de	j                  f fdZe	 	 	 	 	 	 	 	 	 	 dd	eej                     d
eej                     deej                     deej                     deej                     deej                     deej                     dee   dee   dee   deeef   fd       Z xZS )MobileBertForMaskedLMr  r  c                     t         |   |       t        |d      | _        t	        |      | _        || _        | j                          y NF)r  )ro   rp   r  r#   rh  r  r[   r  r   s     r.   rp   zMobileBertForMaskedLM.__init__  s=     )&EJ(0 	rx   c                 B    | j                   j                  j                  S rn   r  rc  s    r.   r  z+MobileBertForMaskedLM.get_output_embeddings  r  rx   c                     || j                   j                  _        |j                  | j                   j                  _        y rn   r  r  s     r.   r  z+MobileBertForMaskedLM.set_output_embeddings  r  rx   r  rz   c                     | j                  | j                  j                  j                  |d      | j                  j                  _        t        |   |      S r  r  r  s     r.   r  z-MobileBertForMaskedLM.resize_token_embeddings  sR    %)%>%>HH  &&~RV &? &
" w.n.MMrx   r   r   r   r   r   r   r  r   rE  rF  c                    |
|
n| j                   j                  }
| j                  ||||||||	|
	      }|d   }| j                  |      }d}|Ft	               } ||j                  d| j                   j                        |j                  d            }|
s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        Nr  r   r   r9   r  logitsr   rK  )
r[   r  r#   r  r   r   r   r   r   rK  )rt   r   r   r   r   r   r   r  r   rE  rF  r   rk  rm  r  r  r   s                    r.   r|   zMobileBertForMaskedLM.forward  s    ( &1%<k$++B]B]//))%'/!5# " 

 "!* HH_5')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY$!//))	
 	
rx   rn   
NNNNNNNNNN)r~   r   r   r  rp   r  r  r   rR   r   r   r  r   rW   r   r   r   r   r   r   r|   r   r   s   @r.   r  r    s?   :<Z[,8Nhsm Nr|| N  156:59371559-1,0/3&*2
E,,-2
 !!2!232
 !!1!12	2

 u//02
 E--.2
   1 122
 ))*2
 $D>2
 'tn2
 d^2
 
un$	%2
 2
rx   r  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MobileBertOnlyNSPHeadc                 l    t         |           t        j                  |j                  d      | _        y rq  )ro   rp   r   r   r   rr  r   s     r.   rp   zMobileBertOnlyNSPHead.__init__  s'     "		&*<*<a @rx   rW  rz   c                 (    | j                  |      }|S rn   )rr  )rt   rW  rt  s      r.   r|   zMobileBertOnlyNSPHead.forward  s    !%!6!6}!E%%rx   r}   r   s   @r.   r  r    s$    A&U\\ &ell &rx   r  zZ
    MobileBert Model with a `next sentence prediction (classification)` head on top.
    c                   D    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	ee	   d
ee	   dee	   de
eef   fd       Z xZS )#MobileBertForNextSentencePredictionc                     t         |   |       t        |      | _        t	        |      | _        | j                          y rn   )ro   rp   r  r#   r  r  r  r   s     r.   rp   z,MobileBertForNextSentencePrediction.__init__   s4     )&1(0 	rx   r   r   r   r   r   r   r  r   rE  rF  rz   c                    d|v r+t        j                  dt               |j                  d      }|
|
n| j                  j
                  }
| j                  ||||||||	|
	      }|d   }| j                  |      }d}|2t               } ||j                  dd      |j                  d            }|
s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )	a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
            (see `input_ids` docstring) Indices should be in `[0, 1]`.

            - 0 indicates sequence B is a continuation of sequence A,
            - 1 indicates sequence B is a random sequence.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, MobileBertForNextSentencePrediction
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
        >>> model = MobileBertForNextSentencePrediction.from_pretrained("google/mobilebert-uncased")

        >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
        >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
        >>> encoding = tokenizer(prompt, next_sentence, return_tensors="pt")

        >>> outputs = model(**encoding, labels=torch.LongTensor([1]))
        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```r  zoThe `next_sentence_label` argument is deprecated and will be removed in a future version, use `labels` instead.Nr  r   r   r9   r  )warningswarnFutureWarningpopr[   r  r#   r  r   r   r   r   rK  )rt   r   r   r   r   r   r   r  r   rE  rF  kwargsr   rW  rt  r  r  r   s                     r.   r|   z+MobileBertForNextSentencePrediction.forward	  s   R !F*MM%
 ZZ 56F%0%<k$++B]B]//))%'/!5# " 

  
!%-!8!')H!)*@*E*Eb!*LfkkZ\o!^,.<F7I7U')F2a[aa*#)!//))	
 	
rx   r  )r~   r   r   rp   r   r   rW   r   r   r   r   r   r   r|   r   r   s   @r.   r  r    s     156:59371559-1,0/3&*O
E,,-O
 !!2!23O
 !!1!12	O

 u//0O
 E--.O
   1 12O
 ))*O
 $D>O
 'tnO
 d^O
 
u11	2O
 O
rx   r  z
    MobileBert Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c                   ^    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	ee   d
ee   dee   de	e
ej                     ef   fd       Z xZS )#MobileBertForSequenceClassificationc                 n   t         |   |       |j                  | _        || _        t	        |      | _        |j                  |j                  n|j                  }t        j                  |      | _
        t        j                  |j                  |j                        | _        | j                          y rn   )ro   rp   
num_labelsr[   r  r#   classifier_dropoutr   r   r   r   r   r   r8   r  rt   r[   r  rw   s      r.   rp   z,MobileBertForSequenceClassification.__init__d  s      ++)&1)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	rx   r   r   r   r   r   r   r  r   rE  rF  rz   c                 @   |
|
n| j                   j                  }
| j                  ||||||||	|
	      }|d   }| j                  |      }| j	                  |      }d}|| j                   j
                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rIt               }| j                  dk(  r& ||j                         |j                               }n |||      }n| j                   j
                  dk(  r=t               } ||j                  d| j                        |j                  d            }n,| j                   j
                  dk(  rt               } |||      }|
s|f|dd z   }||f|z   S |S t!        |||j"                  |j$                  	      S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr  r   
regressionsingle_label_classificationmulti_label_classificationr   r9   r  )r[   r  r#   r   r8   problem_typer  r   rW   r   rR   r	   squeezer   r   r   r   r   rK  )rt   r   r   r   r   r   r   r  r   rE  rF  r   rW  r  r  r  r   s                    r.   r|   z+MobileBertForSequenceClassification.forwards  s   ( &1%<k$++B]B]//))%'/!5# " 

  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
rx   r  )r~   r   r   rp   r   r   rW   r   r   r   r   r   r|   r   r   s   @r.   r  r  \  s     -11515/3,004)-,0/3&*E
ELL)E
 !.E
 !.	E

 u||,E
 ELL)E
  -E
 &E
 $D>E
 'tnE
 d^E
 
uU\\"$<<	=E
 E
rx   r  c                   ~    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
ee   dee   dee   de	e
ej                     ef   fd       Z xZS )MobileBertForQuestionAnsweringc                     t         |   |       |j                  | _        t        |d      | _        t        j                  |j                  |j                        | _        | j                          y r  )
ro   rp   r  r  r#   r   r   r   
qa_outputsr  r   s     r.   rp   z'MobileBertForQuestionAnswering.__init__  sU      ++)&EJ))F$6$68I8IJ 	rx   r   r   r   r   r   r   start_positionsend_positionsr   rE  rF  rz   c                 (   ||n| j                   j                  }| j                  |||||||	|
|	      }|d   }| j                  |      }|j	                  dd      \  }}|j                  d      j                         }|j                  d      j                         }d }||t        |j                               dkD  r|j                  d      }t        |j                               dkD  r|j                  d      }|j                  d      }|j                  d|      }|j                  d|      }t        |      } |||      } |||      }||z   dz  }|s||f|dd  z   }||f|z   S |S t        ||||j                  |j                        S )	Nr  r   r   r   r   )ignore_indexr9   )r  start_logits
end_logitsr   rK  )r[   r  r#   r  rK   r  r   rQ   r   clampr   r   r   rK  )rt   r   r   r   r   r   r   r  r  r   rE  rF  r   rk  r  r  r  r  ignored_indexr  
start_lossend_lossr   s                          r.   r|   z&MobileBertForQuestionAnswering.forward  s    &1%<k$++B]B]//))%'/!5# " 

 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
rx   r  )r~   r   r   rp   r   r   rW   r   r   r   r   r   r|   r   r   s   @r.   r  r    s     -11515/3,0042604,0/3&*>
ELL)>
 !.>
 !.	>

 u||,>
 ELL)>
  ->
 "%,,/>
  ->
 $D>>
 'tn>
 d^>
 
uU\\"$@@	A>
 >
rx   r  c                   ^    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	ee   d
ee   dee   de	e
ej                     ef   fd       Z xZS )MobileBertForMultipleChoicec                 *   t         |   |       t        |      | _        |j                  |j                  n|j
                  }t        j                  |      | _        t        j                  |j                  d      | _        | j                          y r4  )ro   rp   r  r#   r  r   r   r   r   r   r   r8   r  r  s      r.   rp   z$MobileBertForMultipleChoice.__init__  su     )&1)/)B)B)NF%%TZTnTn 	 zz"45))F$6$6: 	rx   r   r   r   r   r   r   r  r   rE  rF  rz   c                 L   |
|
n| j                   j                  }
||j                  d   n|j                  d   }|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|1|j                  d|j	                  d      |j	                  d            nd}| j                  ||||||||	|
	      }|d   }| j                  |      }| j                  |      }|j                  d|      }d}|t               } |||      }|
s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a[  
        input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        Nr   r   r   r  r9   r  )r[   r  rT   r   r   r#   r   r8   r   r   r   rK  )rt   r   r   r   r   r   r   r  r   rE  rF  num_choicesr   rW  r  reshaped_logitsr  r  r   s                      r.   r|   z#MobileBertForMultipleChoice.forward  s   X &1%<k$++B]B],5,Aiooa(}GZGZ[\G]>G>SINN2y~~b'9:Y]	M[Mg,,R1D1DR1HImqM[Mg,,R1D1DR1HImqGSG_|((\->->r-BCei ( r=#5#5b#9=;M;Mb;QR 	 //))%'/!5# " 

  
]3/ ++b+6')HOV4D%''!"+5F)-)9TGf$EvE("!//))	
 	
rx   r  )r~   r   r   rp   r   r   rW   r   r   r   r   r   r|   r   r   s   @r.   r  r    s     -11515/3,004)-,0/3&*X
ELL)X
 !.X
 !.	X

 u||,X
 ELL)X
  -X
 &X
 $D>X
 'tnX
 d^X
 
uU\\"$==	>X
 X
rx   r  c                   ^    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	ee   d
ee   dee   de	e
ej                     ef   fd       Z xZS ) MobileBertForTokenClassificationc                 d   t         |   |       |j                  | _        t        |d      | _        |j
                  |j
                  n|j                  }t        j                  |      | _	        t        j                  |j                  |j                        | _        | j                          y r  )ro   rp   r  r  r#   r  r   r   r   r   r   r   r8   r  r  s      r.   rp   z)MobileBertForTokenClassification.__init__z  s      ++)&EJ)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	rx   r   r   r   r   r   r   r  r   rE  rF  rz   c                    |
|
n| j                   j                  }
| j                  ||||||||	|
	      }|d   }| j                  |      }| j	                  |      }d}|<t               } ||j                  d| j                        |j                  d            }|
s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr  r   r   r9   r  )r[   r  r#   r   r8   r   r   r  r   r   rK  )rt   r   r   r   r   r   r   r  r   rE  rF  r   rk  r  r  r  r   s                    r.   r|   z(MobileBertForTokenClassification.forward  s    $ &1%<k$++B]B]//))%'/!5# " 

 "!*,,71')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
rx   r  )r~   r   r   rp   r   r   rW   r   r   r   r   r   r|   r   r   s   @r.   r  r  w  s     -11515/3,004)-,0/3&*2
ELL)2
 !.2
 !.	2

 u||,2
 ELL)2
  -2
 &2
 $D>2
 'tn2
 d^2
 
uU\\"$99	:2
 2
rx   r  )r  r  r  r  r  r  r  r2  r  rv  rj   )Jr   rA   r  dataclassesr   typingr   r   rW   r   torch.nnr   r   r	   activationsr   modeling_outputsr   r   r   r   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   r   configuration_mobilebertr   
get_loggerr~   r?   rj   Modulerl   r    r   r   r   r   r   r   r
  r  r  r  r(  r,  r2  rA  rP  rY  r^  rh  ro  rv  r  r  r  r  r  r  r  r  r  r  __all__r+   rx   r.   <module>r     s'  .  	  ! "   A A !	 	 	 . Q 9 9 6 
		H	%K\6RYY 6 &
9I299 IX;bii ;|299 "/")) /dRYY ryy ryy 0	bii 	!] !]H			 		ryy 	<bii <~'
		 '
T!ryy !&		 " ,!BII !	9 	9 % % %0 
:[ : :& g
/ g
 g
T f
8 f
f
R M
5 M
 M
`&BII & 
Z
*C Z

Z
z V
*C V
V
r J
%> J
 J
Z g
"; g
 g
T B
'@ B
 B
Jrx   