
    rh                        d dl Z d dlmZmZ d dlmZ d dlZd dlm	Z
 d dl	Zd dlmZmZmZ d dlmZmZ d dlmZ ddlmZmZmZmZmZmZ ddlmZmZmZmZ dd	l m!Z!m"Z"m#Z# d
dl$m%Z%  e#jL                  e'      Z(dZ)dZ*dZ+dZ,d Z-d Z. G d dej^                        Z0 G d dej^                        Z1 G d dej^                        Z2 G d dej^                        Z3 G d dej^                        Z4 G d dej^                        Z5 G d dej^                        Z6 G d  d!e      Z7 G d" d#ej^                        Z8 e!d$e+       G d% d&e7             Z9 ee9e)de*        G d' d(ej^                        Z: e!d)e+       G d* d+e7             Z; ee;e)ee*        G d, d-ej^                        Z< e!d.e+       G d/ d0e7             Z= ee=e)ee*        G d1 d2ej^                        Z> e!d3e+       G d4 d5e7             Z? ee?e,j                  d6              ee?e)ee*        G d7 d8ej^                        ZA e!d9e+       G d: d;e7             ZB eeBe)ee*        G d< d=ej^                        ZC e!d>e+       G d? d@e7             ZD eeDe)ee*       g dAZEy)B    N)CallableOptional)
FrozenDictfreezeunfreeze)flatten_dictunflatten_dict)lax   )FlaxBaseModelOutputFlaxMaskedLMOutputFlaxMultipleChoiceModelOutput FlaxQuestionAnsweringModelOutputFlaxSequenceClassifierOutputFlaxTokenClassifierOutput)ACT2FNFlaxPreTrainedModelappend_call_sample_docstringoverwrite_call_docstring)add_start_docstrings%add_start_docstrings_to_model_forwardlogging   )DistilBertConfigzdistilbert-base-uncasedr   a  

    This model inherits from [`FlaxPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading, saving and converting weights from PyTorch models)

    This model is also a
    [flax.linen.Module](https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html) subclass. Use it as
    a regular Flax linen Module and refer to the Flax documentation for all matter related to general usage and
    behavior.

    Finally, this model supports inherent JAX features such as:

    - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
    - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
    - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
    - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)

    Parameters:
        config ([`DistilBertConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`numpy.ndarray` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`numpy.ndarray` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
c                 v    dt        j                  dd|dz  z  t        j                  |      z        z  }| |z  S )Nr   i'     )nppowerfloat32)posid_modelangle_ratess       /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/distilbert/modeling_flax_distilbert.py
get_anglesr%   `   s8    bhhuqAF|rzz'7J&JKKK    c                    t        t        j                  |       d d t        j                  f   t        j                  |      t        j                  d d f   |      }t        j                  |d d dd df         |d d dd df<   t        j
                  |d d dd df         |d d dd df<   |t        j                  df   }t        j                  |      S )Nr   r   r   .)r%   r   arangenewaxissincosjnparray)positionr"   
angle_radspos_encodings       r$   positional_encodingr1   e   s    BIIh/2::>		'@RSUS]S]_`S`@acjkJ &&Aqt!tG!45Jq!$Q$w &&Aqt!tG!45Jq!$Q$wbjj#o.L99\""r&   c                   f    e Zd ZU dZeed<   ej                  Zej                  ed<   d Z	dde
fdZy)	FlaxEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.configdtypec                 R   t        j                  | j                  j                  | j                  j                  t
        j                   j                  j                  | j                  j                              | _	        | j                  j                  st        j                  | j                  j                  | j                  j                  t
        j                   j                  j                  | j                  j                              | _        n9t        | j                  j                  | j                  j                        | _        t        j                  d| j                         | _        t        j"                  | j                  j$                        | _        y )Nstddev)embedding_init-q=epsilonr5   rate)nnEmbedr4   
vocab_sizedimjaxinitializersnormalinitializer_rangeword_embeddingssinusoidal_pos_embdsmax_position_embeddingsposition_embeddingsr1   r0   	LayerNormr5   Dropoutdropoutselfs    r$   setupzFlaxEmbeddings.setupz   s   !xxKK""KKOO66..55T[[=Z=Z5[ 

 {{//')xx33"vv2299A^A^9_(D$ !4DKK4W4WY]YdYdYhYh iDe4::Fzzt{{':':;r&   deterministicc                    |j                   \  }}| j                  |j                  d            }| j                  j                  s^t        j                  |      j                  d      }t        j                  |||f      }| j                  |j                  d            }n3| j                  d d d |d d f   }|j                  |j                        }||z   }| j                  |      }| j                  ||      }|S )Ni4)shaperQ   )rT   rG   astyper4   rH   r,   r(   broadcast_torJ   r0   r5   rK   rM   )	rO   	input_idsrQ   
batch_size
seq_lengthinputs_embedsposition_idsposition_embedshidden_statess	            r$   __call__zFlaxEmbeddings.__call__   s    !*
J,,Y-=-=d-CD{{//::j188>L++LZ@XYL"66|7J7J47PQO"//;J;0ABO-44]5H5HIO &7 }5]-Pr&   NT)__name__
__module____qualname____doc__r   __annotations__r,   r   r5   rP   boolr_    r&   r$   r3   r3   t   s.    Q{{E399"<" r&   r3   c                   j    e Zd ZU eed<   ej                  Zej                  ed<   d Z	 	 dde	de	fdZ
y)	FlaxMultiHeadSelfAttentionr4   r5   c                    | j                   j                  | _        | j                   j                  | _        t        j                  | j                   j
                        | _        | j                  | j                  z  dk(  s%t        d| j                   d| j                         t        j                  | j                  | j                  t        j                  j                  j                  | j                   j                              | _        t        j                  | j                  | j                  t        j                  j                  j                  | j                   j                              | _        t        j                  | j                  | j                  t        j                  j                  j                  | j                   j                              | _        t        j                  | j                  | j                  t        j                  j                  j                  | j                   j                              | _        y )Nr=   r   Hidden size " not dividable by number of heads r7   r5   kernel_init)r4   n_headsrB   r?   rL   attention_dropoutrM   
ValueErrorDenser5   rC   rD   rE   rF   q_link_linv_linout_linrN   s    r$   rP   z FlaxMultiHeadSelfAttention.setup   s   {{**;;??zzt{{'D'DE4<<'1,|DHH:5WX\XdXdWefggXXHH**++22$++:W:W2X


 XXHH**++22$++:W:W2X


 XXHH**++22$++:W:W2X


 xxHH**++22$++:W:W2X
r&   rQ   output_attentionsc           	          |j                   \  }}|j                   d   }	 j                   j                  z  dd|	f}
 fd} fd} | j                  |            } | j	                  |            } | j                  |            }|t        j                        z  }t        j                  ||j                  dddd            }t        j                  ||
      }|j                  |j                        }|dd|z
  z  z
  }t        j                  |d	
      } j!                  ||      }t        j                  ||      } ||      } j#                  |      }|r||fS |fS )Nr   c                 d    | j                  dj                        j                  dddd      S )zseparate headsr   r   r   r   )reshapero   	transposexbsdim_per_headrO   s    r$   rT   z2FlaxMultiHeadSelfAttention.__call__.<locals>.shape   s/    99RT\\<@JJ1aQRTUVVr&   c                 h    | j                  dddd      j                  dj                  z        S )zgroup headsr   r   r   r   rz   )r|   r{   ro   r}   s    r$   unshapez4FlaxMultiHeadSelfAttention.__call__.<locals>.unshape   s0    ;;q!Q*222r4<<,;VWWr&   r   r   r   gꌠ9Y>)Fg      ?rz   axisrU   )rT   rB   ro   rs   rt   ru   mathsqrtr,   matmulr|   r{   rV   r5   r?   softmaxrM   rv   )rO   querykeyvaluemaskrQ   rw   q_lenrB   k_len
mask_reshprT   r   qkvscoresweightscontextr   r   s   `                  @@r$   r_   z#FlaxMultiHeadSelfAttention.__call__   sP    E3		! xx4<</!Q&
	W	X $**U#$$**S/"$**U#$		,''Aq{{1aA67{{4,{{6<<($#*--**V"-,,wm,D**Wa('",,w'W%%:r&   N)TFra   rb   rc   r   re   r,   r   r5   rP   rf   r_   rg   r&   r$   ri   ri      sA    {{E399"
F #"'/ /  /r&   ri   c                   b    e Zd ZU eed<   ej                  Zej                  ed<   d Zdde	fdZ
y)FlaxFFNr4   r5   c                    t        j                  | j                  j                        | _        | j                  j                  | _        d| _        t        j                  | j                  j                  | j                  t        j                   j                  j                  | j                  j                              | _        t        j                  | j                  j                  | j                  t        j                   j                  j                  | j                  j                              | _        t         | j                  j"                     | _        y )Nr=   r   r7   rm   )r?   rL   r4   rM   chunk_size_feed_forwardseq_len_dimrr   
hidden_dimr5   rC   rD   rE   rF   lin1rB   lin2r   
activationrN   s    r$   rP   zFlaxFFN.setup   s    zzt{{':':;'+{{'J'J$HHKK""**++22$++:W:W2X
	
 HHKKOO**++22$++:W:W2X
	 !!7!78r&   rQ   c                     | j                  |      }| j                  |      }| j                  |      }| j                  ||      }|S )NrU   )r   r   r   rM   )rO   r^   rQ   s      r$   r_   zFlaxFFN.__call__	  sD    		-06		-0]-Pr&   Nr`   r   rg   r&   r$   r   r      s+    {{E399"9"T r&   r   c                   j    e Zd ZU eed<   ej                  Zej                  ed<   d Z	 	 dde	de	fdZ
y)	FlaxTransformerBlockr4   r5   c                    | j                   j                  | j                   j                  z  dk(  s5J d| j                   j                   d| j                   j                          t        | j                   | j                        | _        t        j                  d| j                        | _        t        | j                   | j                        | _
        t        j                  d| j                        | _        y )Nr   rk   rl   r5   r:   r;   )r4   rB   ro   ri   r5   	attentionr?   rK   sa_layer_normr   ffnoutput_layer_normrN   s    r$   rP   zFlaxTransformerBlock.setup  s    {{!4!449 	
4;;??++MdkkNaNaMbc	
9 4DKKtzzR\\%tzzJ4;;djj9!#e4::!Nr&   rw   rQ   c                     | j                  ||||||      }|r|\  }}nt        |      t        u sJ |d   }| j                  ||z         }| j	                  ||      }| j                  ||z         }|f}|rf|z   }|S )N)r   r   r   r   rw   rQ   r   rU   )r   typetupler   r   r   )	rO   r^   	attn_maskrw   rQ   	sa_output
sa_weights
ffn_outputoutputs	            r$   r_   zFlaxTransformerBlock.__call__   s     NN/' # 
	 $-!Iz	?e+++!!I&&y='@A	 XXi}XE
++J,BC
 ]V+Fr&   N)FTr   rg   r&   r$   r   r     sA    {{E399"	O #("  	
 r&   r   c            	       v    e Zd ZU eed<   ej                  Zej                  ed<   d Z	 	 	 	 d
de	de	de	de	fdZ
y	)FlaxTransformerr4   r5   c           	          t        | j                  j                        D cg c]-  }t        | j                  t	        |      | j
                        / c}| _        y c c}w )N)namer5   )ranger4   n_layersr   strr5   layers)rO   r!   s     r$   rP   zFlaxTransformer.setupD  sF    V[\`\g\g\p\pVq
QR 3q6L
 
s   2Arw   output_hidden_statesrQ   return_dictc                 $   |rdnd }|rdnd }| j                   D ]I  }	|r||fz   } |	||||      }
|
d   }|rt        |
      dk(  sJ |
d   }||fz   }:t        |
      dk(  rIJ  |r||fz   }|st        d |||fD              S t        |||      S )	Nrg   )r^   r   rw   rQ   rz   r   r   r   c              3   &   K   | ]	  }||  y wNrg   ).0r   s     r$   	<genexpr>z+FlaxTransformer.__call__.<locals>.<genexpr>m  s     hqZ[Zghs   )last_hidden_stater^   
attentions)r   lenr   r   )rO   r^   attention_maskrw   r   rQ   r   all_hidden_statesall_attentionslayer_modulelayer_outputsr   s               r$   r_   zFlaxTransformer.__call__I  s     #7BD0d KK 	/L#$58H$H!(+("3+	M *"-M =)Q...*1-
!/:-!?=)Q...#	/(   1]4D Dh]NDU$Vhhh"+;LYg
 	
r&   NFFTFr   rg   r&   r$   r   r   @  sZ    {{E399"
 #(%*"!'
  	'

 #'
 '
 '
r&   r   c            	       v    e Zd ZU eed<   ej                  Zej                  ed<   d Z	 	 	 	 d
de	de	de	de	fdZ
y	)FlaxTransformerEncoderr4   r5   c                 P    t        | j                  | j                        | _        y Nr   )r   r4   r5   layerrN   s    r$   rP   zFlaxTransformerEncoder.setupw  s    $T[[

C
r&   rw   r   rQ   r   c                 0    | j                  ||||||      S )N)r^   r   rw   r   rQ   r   )r   )rO   r^   r   rw   r   rQ   r   s          r$   r_   zFlaxTransformerEncoder.__call__z  s,     zz')/!5'#  
 	
r&   Nr   r   rg   r&   r$   r   r   s  s[    {{E399"D #(%*"!
  	

 #
 
 
r&   r   c                       e Zd ZU eed<   ej                  Zej                  ed<   ej                  j                  j                  Zedej                  f   ed<   d Zd Zy)FlaxDistilBertLMDecoderr4   r5   .	bias_initc                 r    | j                  d| j                  | j                  j                  f      | _        y )Nbias)paramr   r4   rA   r   rN   s    r$   rP   zFlaxDistilBertLMDecoder.setup  s'    JJvt~~8N8N7PQ	r&   c                 6   t        j                  || j                        }t        j                  || j                        }t        j                  |||j
                  dz
  fdfdf      }t        j                  | j                  | j                        }||z   }|S )Nr   )r   )rg   rg   )r,   asarrayr5   r
   dot_generalndimr   )rO   inputskernelyr   s        r$   r_   z FlaxDistilBertLMDecoder.__call__  sw    VTZZ0VTZZ0OOFFv{{Q.@$-G,RS{{499djj1Hr&   N)ra   rb   rc   r   re   r,   r   r5   rC   r?   rD   zerosr   r   r   ndarrayrP   r_   rg   r&   r$   r   r     sL    {{E399"+.66+>+>+D+DIxRZZ(DRr&   r   c                   ~    e Zd ZU dZeZdZdZej                  e
d<   ddej                  dfded	ed
edej                  def
 fdZddej&                  j(                  d	ededefdZ eej3                  d            	 	 	 	 	 	 	 	 ddee   dej&                  j(                  dedee   dee   dee   fd       Z xZS )FlaxDistilBertPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    
distilbertNmodule_class)r   r   r   Tr4   input_shapeseedr5   _do_initc                 Z     | j                   d||d|}t        | 	  ||||||       y )Nr4   r5   )r   r   r5   r   rg   )r   super__init__)	rO   r4   r   r   r5   r   kwargsmodule	__class__s	           r$   r   z&FlaxDistilBertPreTrainedModel.__init__  s=     #""H&HH[tSXcklr&   rngparamsreturnc                    t        j                  |d      }t        j                  |      }t        j                  j                  |      \  }}||d}| j                  j                  |||d      d   }	|dt        t        |	            }	t        t        |            }| j                  D ]
  }
|	|
   ||
<    t               | _
        t        t        |            S |	S )NrS   r   )r   rM   F)r   r   )r,   r   	ones_likerC   randomsplitr   initr   r   _missing_keyssetr   r	   )rO   r   r   r   rX   r   
params_rngdropout_rngrngsrandom_paramsmissing_keys              r$   init_weightsz*FlaxDistilBertPreTrainedModel.init_weights  s    IIk6	y1"%**"2"23"7
K$=((y.V[(\]ef(-)@AM!(6"23F#11 A&3K&@{#A!$D.011  r&   zbatch_size, sequence_lengthr   trainrw   r   r   c
           
         ||n| j                   j                  }||n| j                   j                  }|	|	n| j                   j                  }	|t	        j
                  |      }i }
|||
d<   | j                  j                  d|xs | j                  it	        j                  |d      t	        j                  |d      | |||	|
      S )NrM   r   rS   r   )r   )
r4   rw   r   r   r,   r   r   applyr   r-   )rO   rX   r   	head_maskr   r   r   rw   r   r   r   s              r$   r_   z&FlaxDistilBertPreTrainedModel.__call__  s     2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY! ]]95N ")DO{{  v,-IIit,IInD1I  ! 	
 		
r&   r   )NNNNFNNN)ra   rb   rc   rd   r   config_classbase_model_prefixr   r?   Modulere   r,   r   r   intr5   rf   r   rC   r   PRNGKeyr   r   r   DISTILBERT_INPUTS_DOCSTRINGformatr   dictr_   __classcell__)r   s   @r$   r   r     s8   
 $L$"L"))"
 $;;
m 
m 
m 	
m
 yy
m 
m!

 2 2 ! !PZ !fp !( ++F+M+MNk+lm !%*.,0/3&*#

 #
 ZZ''#
 #
 $D>#
 'tn#
 d^#
 n#
r&   r   c            	       v    e Zd ZU eed<   ej                  Zej                  ed<   d Z	 	 	 	 d
de	de	de	de	fdZ
y	)FlaxDistilBertModuler4   r5   c                     t        | j                  | j                        | _        t	        | j                  | j                        | _        y r   )r3   r4   r5   
embeddingsr   transformerrN   s    r$   rP   zFlaxDistilBertModule.setup  s/    (DJJG1$++TZZPr&   rQ   rw   r   r   c                     ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }| j	                  ||      }| j                  ||||||      S )NrU   )r^   r   rQ   rw   r   r   )r4   rw   r   r   r  r  )rO   rX   r   rQ   rw   r   r   input_embedss           r$   r_   zFlaxDistilBertModule.__call__  s     2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBYyN&)'/!5#   
 	
r&   NTFFTr   rg   r&   r$   r
  r
    s[    {{E399"Q #"'%* 
 	

  
 #
 
r&   r
  zdThe bare DistilBert Model transformer outputting raw hidden-states without any specific head on top.c                       e Zd ZeZy)FlaxDistilBertModelN)ra   rb   rc   r
  r   rg   r&   r$   r  r    s	    
 (Lr&   r  c            	       v    e Zd ZU eed<   ej                  Zej                  ed<   d Z	 	 	 	 d
de	de	de	de	fdZ
y	)FlaxDistilBertForMaskedLMModuler4   r5   c                    t        | j                  | j                        | _        t	        j
                  | j                  j                  | j                  t        j                  j                  j                  | j                  j                              | _        t	        j                  d| j                        | _        | j                  j                  r't        | j                  | j                        | _        y t	        j
                  | j                  j"                  | j                  t        j                  j                  j                  | j                  j                              | _        y )Nr   r7   rm   r:   r;   )r
  r4   r5   r   r?   rr   rB   rC   rD   rE   rF   vocab_transformrK   vocab_layer_normtie_word_embeddingsr   vocab_projectorrA   rN   s    r$   rP   z%FlaxDistilBertForMaskedLMModule.setup   s    .t{{$**M!xxKKOO**++22$++:W:W2X 

 !#U$** M;;**#:jj$D 
 $&88&&jjFF//66dkk>[>[6\$D r&   rQ   rw   r   r   c                     ||n| j                   j                  }| j                  ||||||      }|d   }| j                  |      }	t	        | j                   j
                     |	      }	| j                  |	      }	| j                   j                  r?| j                  j                  d   d   d   d   }
| j                  |	|
j                        }	n| j                  |	      }	|s|	f|dd  z   }|S t        |	|j                  |j                        S )	N)rX   r   rw   r   rQ   r   r   r   r  rG   	embeddingr   logitsr^   r   )r4   use_return_dictr   r  r   r   r  r  	variablesr  Tr   r^   r   )rO   rX   r   rQ   rw   r   r   dlbrt_outputr^   prediction_logitsshared_embeddingr   s               r$   r_   z(FlaxDistilBertForMaskedLMModule.__call__4  s&    &1%<k$++B]B])/!5'# ' 
 %Q 00?"4;;#9#9:;LM 112CD;;**#88B<PQbcdop $ 4 45FHXHZHZ [ $ 4 45F G')L,<<FM!$&44#..
 	
r&   Nr  r   rg   r&   r$   r  r    sZ    {{E399"0 #"'%* &
 	&

  &
 #&
 &
r&   r  z8DistilBert Model with a `language modeling` head on top.c                       e Zd ZeZy)FlaxDistilBertForMaskedLMN)ra   rb   rc   r  r   rg   r&   r$   r%  r%  ]  s    2Lr&   r%  c            	       v    e Zd ZU eed<   ej                  Zej                  ed<   d Z	 	 	 	 d
de	de	de	de	fdZ
y	)-FlaxDistilBertForSequenceClassificationModuler4   r5   c                    t        | j                  | j                        | _        t	        j
                  | j                  j                  | j                  t        j                  j                  j                  | j                  j                              | _        t	        j                  | j                  j                        | _        t	        j
                  | j                  j                  | j                        | _        y )Nr   r7   rm   r=   r   )r
  r4   r5   r   r?   rr   rB   rC   rD   rE   rF   pre_classifierrL   seq_classif_dropoutrM   
num_labels
classifierrN   s    r$   rP   z3FlaxDistilBertForSequenceClassificationModule.setupi  s    .dkkT hhKKOO**++22$++:W:W2X

 zzt{{'F'FG((KK""**
r&   rQ   rw   r   r   c                 `   ||n| j                   j                  }| j                  ||||||      }|d   }|d d df   }	| j                  |	      }	t	        d   |	      }	| j                  |	|      }	| j                  |	      }
|s	|
f|dd  z   S t        |
|j                  |j                        S )NrQ   rw   r   r   r   relurU   r   r  )
r4   r  r   r)  r   rM   r,  r   r^   r   )rO   rX   r   rQ   rw   r   r   distilbert_outputhidden_statepooled_outputr  s              r$   r_   z6FlaxDistilBertForSequenceClassificationModule.__call__v  s     &1%<k$++B]B] OO'/!5# , 
 )+$QT*++M:v}5]-P/90444++99(33
 	
r&   Nr  r   rg   r&   r$   r'  r'  e  sZ    {{E399"
" #"'%* !
 	!

  !
 #!
 !
r&   r'  z
    DistilBert Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c                       e Zd ZeZy)'FlaxDistilBertForSequenceClassificationN)ra   rb   rc   r'  r   rg   r&   r$   r4  r4    s
     ALr&   r4  c            	       v    e Zd ZU eed<   ej                  Zej                  ed<   d Z	 	 	 	 d
de	de	de	de	fdZ
y	)%FlaxDistilBertForMultipleChoiceModuler4   r5   c                    t        | j                  | j                        | _        t	        j
                  | j                  j                  | j                  t        j                  j                  j                  | j                  j                              | _        t	        j                  | j                  j                        | _        t	        j
                  d| j                        | _        y )Nr   r7   rm   r=   r   r   )r
  r4   r5   r   r?   rr   rB   rC   rD   rE   rF   r)  rL   r*  rM   r,  rN   s    r$   rP   z+FlaxDistilBertForMultipleChoiceModule.setup  s    .dkkT hhKKOO**++22$++:W:W2X

 zzt{{'F'FG((**
r&   rQ   rw   r   r   c                 .   ||n| j                   j                  }|j                  d   }||j                  d|j                  d         nd }||j                  d|j                  d         nd }| j	                  ||||||      }|d   }	|	d d df   }
| j                  |
      }
t        d   |
      }
| j                  |
|      }
| j                  |
      }|j                  d|      }|s	|f|dd  z   S t        ||j                  |j                        S )	Nr   rz   r.  r   r/  rU   r   r  )r4   r  rT   r{   r   r)  r   rM   r,  r   r^   r   )rO   rX   r   rQ   rw   r   r   num_choicesoutputsr1  r2  r  reshaped_logitss                r$   r_   z.FlaxDistilBertForMultipleChoiceModule.__call__  s7    &1%<k$++B]B]ooa(BKBWI%%b)//"*=>]a	Q_Qk//N4H4H4LMqu //'/!5# " 
 qz$QT*++M:v}5]-P/ ..[9#%33,"!//))
 	
r&   Nr  r   rg   r&   r$   r6  r6    sZ    {{E399"
" #"'%* (
 	(

  (
 #(
 (
r&   r6  z
    DistilBert Model with a multiple choice classification head on top (a linear layer on top of the pooled output and
    a softmax) e.g. for RocStories/SWAG tasks.
    c                       e Zd ZeZy)FlaxDistilBertForMultipleChoiceN)ra   rb   rc   r6  r   rg   r&   r$   r=  r=    s	     9Lr&   r=  z(batch_size, num_choices, sequence_lengthc            	       v    e Zd ZU eed<   ej                  Zej                  ed<   d Z	 	 	 	 d
de	de	de	de	fdZ
y	)*FlaxDistilBertForTokenClassificationModuler4   r5   c                 "   t        | j                  | j                        | _        t	        j
                  | j                  j                        | _        t	        j                  | j                  j                  | j                        | _	        y )Nr   r=   r   )
r
  r4   r5   r   r?   rL   rM   rr   r+  r,  rN   s    r$   rP   z0FlaxDistilBertForTokenClassificationModule.setup  sR    .dkkTzzt{{':':;((4;;#9#9Lr&   rQ   rw   r   r   c                    ||n| j                   j                  }| j                  ||||||      }|d   }| j                  ||      }| j	                  |      }	|s	|	f|dd  z   S t        |	|j                  |j                        S )Nr.  r   rU   r   r  )r4   r  r   rM   r,  r   r^   r   )
rO   rX   r   rQ   rw   r   r   r:  r^   r  s
             r$   r_   z3FlaxDistilBertForTokenClassificationModule.__call__  s     &1%<k$++B]B]//'/!5# " 
  
]-P/9wqr{**(!//))
 	
r&   Nr  r   rg   r&   r$   r?  r?    s[    {{E399"M #"'%* 
 	

  
 #
 
r&   r?  z
    DistilBert Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g.
    for Named-Entity-Recognition (NER) tasks.
    c                       e Zd ZeZy)$FlaxDistilBertForTokenClassificationN)ra   rb   rc   r?  r   rg   r&   r$   rC  rC  *  s	     >Lr&   rC  c            	       v    e Zd ZU eed<   ej                  Zej                  ed<   d Z	 	 	 	 d
de	de	de	de	fdZ
y	)(FlaxDistilBertForQuestionAnsweringModuler4   r5   c                 X   t        | j                  | j                        | _        t	        j
                  | j                  j                  | j                        | _        | j                  j                  dk(  sJ t	        j                  | j                  j                        | _
        y )Nr   r   r   r=   )r
  r4   r5   r   r?   rr   r+  
qa_outputsrL   
qa_dropoutrM   rN   s    r$   rP   z.FlaxDistilBertForQuestionAnsweringModule.setupA  sj    .dkkT((4;;#9#9L{{%%***zzt{{'='=>r&   rQ   rw   r   r   c                    ||n| j                   j                  }| j                  ||||||      }|d   }| j                  ||      }| j	                  |      }	t        j                  |	| j                   j                  d      \  }
}|
j                  d      }
|j                  d      }|s
|
|f|dd  z   S t        |
||j                  |j                        S )Nr.  r   rU   rz   r   r   )start_logits
end_logitsr^   r   )r4   r  r   rM   rG  r,   r   r+  squeezer   r^   r   )rO   rX   r   rQ   rw   r   r   r0  r^   r  rJ  rK  s               r$   r_   z1FlaxDistilBertForQuestionAnsweringModule.__call__G  s     &1%<k$++B]B] !OO'/!5# , 
 *!,]-P/#&99VT[[5K5KRT#U j#++B/''+
 *-0A!"0EEE/%!+99(33	
 	
r&   Nr  r   rg   r&   r$   rE  rE  =  sZ    {{E399"? #"'%* %
 	%

  %
 #%
 %
r&   rE  z
    DistilBert Model with a span classification head on top for extractive question-answering tasks like SQuAD (a
    linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
    c                       e Zd ZeZy)"FlaxDistilBertForQuestionAnsweringN)ra   rb   rc   rE  r   rg   r&   r$   rN  rN  o  s	     <Lr&   rN  )r%  r=  rN  r4  rC  r  r   )Fr   typingr   r   
flax.linenlinenr?   rC   	jax.numpynumpyr,   r   flax.core.frozen_dictr   r   r   flax.traverse_utilr   r	   r
   modeling_flax_outputsr   r   r   r   r   r   modeling_flax_utilsr   r   r   r   utilsr   r   r   configuration_distilbertr   
get_loggerra   logger_CHECKPOINT_FOR_DOC_CONFIG_FOR_DOCFLAX_DISTILBERT_START_DOCSTRINGr  r%   r1   r  r3   ri   r   r   r   r   r   r   r
  r  r  r%  r'  r4  r6  r=  r  r?  rC  rE  rN  __all__rg   r&   r$   <module>r`     s     %  
   > > ;   w v Y Y 6 
		H	%/ $# . 6
#*RYY *ZP Pfbii :,299 ,^0
bii 0
f
RYY 
4bii "N
$7 N
b
299 
D j#(7 (	( 02Et_ ]>
bii >
B TVuv3 = 3 w3 68KM_ap q2
BII 2
j  $A.K AA + 	9
BII 9
x  $9&C 99 #%@%G%GHr%s #!	(
 (
V  $>+H >> (	/
ryy /
d  $<)F << &$	r&   