
    rh                     (   d dl mZmZ d dlZd dlmZ d dlZd dlm	Z
 d dl	Zd dlmZmZmZ d dlmZ d dlmZmZ ddlmZmZmZmZ ddlmZmZmZmZ dd	lm Z m!Z! d
dl"m#Z# ejH                  jJ                   G d de             Z&dZ'dZ(de)e*e*f   de
jV                  fdZ,e
jZ                  fdZ. G d dej^                        Z0 G d dej^                        Z1 G d dej^                        Z2 G d dej^                        Z3 G d dej^                        Z4 G d dej^                        Z5 G d  d!ej^                        Z6 G d" d#ej^                        Z7 G d$ d%ej^                        Z8 G d& d'ej^                        Z9 G d( d)ej^                        Z: G d* d+ej^                        Z; G d, d-e      Z< G d. d/ej^                        Z= G d0 d1ej^                        Z> e d2e'       G d3 d4e<             Z?d5Z@ ee?e@        ee?e&e#6        G d7 d8ej^                        ZA e d9e'       G d: d;e<             ZBd<ZC eeBeC        eeBee#6        G d= d>ej^                        ZD e d?e'       G d@ dAe<             ZEdBZF eeEeF        eeEee#6       g dCZGy)D    )CallableOptionalN)
FrozenDictfreezeunfreeze)dot_product_attention_weights)flatten_dictunflatten_dict   )FlaxBaseModelOutputFlaxBaseModelOutputWithPoolingFlaxMaskedLMOutputFlaxSequenceClassifierOutput)ACT2FNFlaxPreTrainedModel append_replace_return_docstringsoverwrite_call_docstring)add_start_docstrings%add_start_docstrings_to_model_forward   )
BeitConfigc                       e Zd ZdZy)FlaxBeitModelOutputWithPoolinga  
    Class for outputs of [`FlaxBeitModel`].

    Args:
        last_hidden_state (`jnp.ndarray` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        pooler_output (`jnp.ndarray` of shape `(batch_size, hidden_size)`):
            Average of the last layer hidden states of the patch tokens (excluding the *[CLS]* token) if
            *config.use_mean_pooling* is set to True. If set to False, then the final hidden state of the *[CLS]* token
            will be returned.
        hidden_states (`tuple(jnp.ndarray)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `jnp.ndarray` (one for the output of the embeddings + one for the output of each layer) of shape
            `(batch_size, sequence_length, hidden_size)`. Hidden-states of the model at the output of each layer plus
            the initial embedding outputs.
        attentions (`tuple(jnp.ndarray)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `jnp.ndarray` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in
            the self-attention heads.
    N)__name__
__module____qualname____doc__     ~/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/beit/modeling_flax_beit.pyr   r   ,   s    r   r   a  

    This model inherits from [`FlaxPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading, saving and converting weights from PyTorch models)

    This model is also a
    [flax.linen.Module](https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html) subclass. Use it as
    a regular Flax linen Module and refer to the Flax documentation for all matter related to general usage and
    behavior.

    Finally, this model supports inherent JAX features such as:

    - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
    - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
    - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
    - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)

    Parameters:
        config ([`BeitConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~FlaxPreTrainedModel.from_pretrained`] method to load the model weights.
        dtype (`jax.numpy.dtype`, *optional*, defaults to `jax.numpy.float32`):
            The data type of the computation. Can be one of `jax.numpy.float32`, `jax.numpy.float16` (on GPUs) and
            `jax.numpy.bfloat16` (on TPUs).

            This can be used to enable mixed-precision training or half-precision inference on GPUs or TPUs. If
            specified all the computation will be performed with the given `dtype`.

            **Note that this only specifies the dtype of the computation and does not influence the dtype of model
            parameters.**

            If you wish to change the dtype of the model parameters, see [`~FlaxPreTrainedModel.to_fp16`] and
            [`~FlaxPreTrainedModel.to_bf16`].
a  
    Args:
        pixel_values (`numpy.ndarray` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`AutoImageProcessor.__call__`] for details.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
window_sizereturnc                    d| d   z  dz
  d| d   z  dz
  z  dz   }t        j                  | d         }t        j                  | d         }t        j                  t        j                  ||d            }t        j                  |d      }|dddddf   |dddddf   z
  }t        j
                  |d	      }|dddddfxx   | d   dz
  z  cc<   |dddddfxx   | d   dz
  z  cc<   |dddddfxx   d| d   z  dz
  z  cc<   t        j                  | d   | d   z  dz   fdz  |j                  
      }|j                  d      |ddddf<   |dz
  |dddf<   |dz
  |dddf<   |dz
  |d<   t        j                  |      S )zP
    get pair-wise relative position index for each token inside the window
       r   r   r   ij)indexing)r$   N)r   r$   r   shapedtyper'   )r   r   )nparangestackmeshgridreshape	transposezerosr*   sumjnparray)r!   num_relative_distancecoords_hcoords_wcoordscoords_flattenrelative_coordsrelative_position_indexs           r    relative_position_index_initr<   w   s    Q/!3KN8JQ8NORSSyyQ(HyyQ(HXXbkk(HtDEFZZ0N$Q4Z0>!T1*3MMOll?I>OAq!GA 22Aq!GA 22Aq!GKN 2Q 66 hhk!n{1~.MPQ.Q-SVW-W_n_t_tu&5&9&9"&=ABF#%:Q%>AqrE"%:Q%>ABE"$9A$=D!99,--r   c                 4    t        j                  ||      |z  S N)r3   ones)keyr)   scaler*   s       r    ones_with_scalerB      s    88E5!E))r   c                   b    e Zd ZU dZeed<   ej                  j                  dde	e
   fd       Zy)FlaxBeitDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).ratedeterministicc                 N   | j                   dk(  r|S d| j                   z
  }|r|S |j                  d   fd|j                  dz
  z  z   }| j                  d      }|t        j
                  j                  |||j                        z   }t        j                  |      }||z  |z  }|S )N        g      ?r   )r   r   droppathr(   )
rE   r)   ndimmake_rngjaxrandomuniformr*   r3   floor)	selfinputsrF   	keep_probr)   rngrandom_tensorbinary_tensoroutputs	            r    __call__zFlaxBeitDropPath.__call__   s    99M$))O	M\\!_&q)AAE--
+C%

(:(:3eSYS_S_(:(``MIIm4Mi'-7FMr   NT)r   r   r   r   float__annotations__nnmodulecompactr   boolrW   r   r   r    rD   rD      s1    b
KYYhtn  r   rD   c                   Z    e Zd ZU eed<   ej                  Zej                  ed<   d Zd Z	y)FlaxBeitPatchEmbeddingsconfigr*   c           
         | j                   j                  | _        | j                   j                  }| j                   j                  }||z  ||z  z  }||z  ||z  f}|| _        || _        t        j                  | j                   j                  ||f||fd| j                  t        j                  j                  j                  | j                   j                              | _        y )NVALID)kernel_sizestridespaddingr*   kernel_init)ra   num_channels
image_size
patch_sizenum_patchespatch_shaper[   Convhidden_sizer*   rL   initializersnormalinitializer_range
projection)rP   ri   rj   rk   rl   s        r    setupzFlaxBeitPatchEmbeddings.setup   s     KK44[[++
[[++
!Z/J*4LM!Z/z1IJ&&''KK###Z0,**++224;;3P3PQ
r   c                     |j                   d   }|| j                  k7  rt        d      | j                  |      }|j                   \  }}}}t	        j
                  ||d|f      S )Nr'   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r)   rh   
ValueErrorrr   r3   r/   )rP   pixel_valuesrh   
embeddings
batch_size_channelss          r    rW   z FlaxBeitPatchEmbeddings.__call__   sl    #))"-4,,,w  __\2
%/%5%5"
Aq({{:
B'ABBr   N
r   r   r   r   rZ   r3   float32r*   rs   rW   r   r   r    r`   r`      s%    {{E399"
"Cr   r`   c                   `    e Zd ZU dZeed<   ej                  Zej                  ed<   d Z	ddZ
y)FlaxBeitEmbeddingsz7Construct the CLS token, position and patch embeddings.ra   r*   c                    | j                  dt        j                  j                  dd| j                  j
                  f      | _        | j                  j                  rG| j                  dt        j                  j                  dd| j                  j
                  f      | _        t        | j                  | j                        | _        | j                  j                  }| j                  j                  rJ| j                  dt        j                  j                  d|dz   | j                  j
                  f      | _        t        j                  | j                  j                         | _        y )N	cls_tokenr   
mask_tokenr*   position_embeddingsrE   )paramr[   ro   r1   ra   rn   r   use_mask_tokenr   r`   r*   patch_embeddingsrk    use_absolute_position_embeddingsr   Dropouthidden_dropout_probdropout)rP   rk   s     r    rs   zFlaxBeitEmbeddings.setup   s    K1F1FAt{{OfOfHgh;;%%"jjr7L7LqRSUYU`U`UlUlNmnDO 74:: V++77;;77'+zz%r'<'<q+PQ/SWS^S^SjSj>k(D$ zzt{{'F'FGr   Nc                    | j                  |      }|j                  \  }}}t        j                  | j                  |d| j
                  j                  f      }|j                  |j                        }|wt        j                  | j                  ||| j
                  j                  f      }	|	j                  |j                        }	t        j                  |d      }
|d|
z
  z  |	|
z  z   }t        j                  ||fd      }| j
                  j                  r(|| j                  j                  |j                        z   }| j                  ||      }|S )Nr   r'   axisrF   )r   r)   r3   broadcast_tor   ra   rn   astyper*   r   expand_dimsconcatenater   r   r   )rP   rv   bool_masked_posrF   rw   rx   seq_lenry   
cls_tokensmask_tokensws              r    rW   zFlaxBeitEmbeddings.__call__   s)   **<8
!+!1!1
GQ%%dnnz1dkkF]F]6^_
&&z'7'78
&**4??ZRVR]R]RiRi<jkK%,,Z-=-=>Kb9A#q1u-a?J__j*%=AF
;;77#d&>&>&E&EjFVFV&WWJ\\*M\J
r   )NT)r   r   r   r   r   rZ   r3   r|   r*   rs   rW   r   r   r    r~   r~      s(    A{{E399"
Hr   r~   c                   n    e Zd ZU eed<   eeef   ed<   ej                  Z	ej                  ed<   d Z
d Zy)FlaxBeitRelativePositionBiasra   r!   r*   c                     d| j                   d   z  dz
  d| j                   d   z  dz
  z  dz   }| j                  dt        j                  j                  || j
                  j                  f      | _        t        | j                         | _	        y )Nr$   r   r   r   relative_position_bias_table)
r!   r   r[   ro   r1   ra   num_attention_headsr   r<   r;   )rP   r5   s     r    rs   z"FlaxBeitRelativePositionBias.setup   s    !"T%5%5a%8!81!<TEUEUVWEXAX[\A\ ]`a a,0JJ*OO!!"DKK$C$CD-
) (DDDTDT'U$r   c                 *   | j                   j                  d      }| j                  d   | j                  d   z  dz   | j                  d   | j                  d   z  dz   df}| j                  |   j                  |      }t	        j
                  |d      S )Nr'   r   r   )r$   r   r   )r;   r/   r!   r   r3   r0   )rP   indexr)   relative_position_biass       r    rW   z%FlaxBeitRelativePositionBias.__call__  s    ,,44R8!!!$t'7'7'::Q>@P@PQR@SVZVfVfghVi@ilm@moqr!%!B!B5!I!Q!QRW!X}}3Y??r   N)r   r   r   r   rZ   tupleintr3   r|   r*   rs   rW   r   r   r    r   r      s4    sCx {{E399"	V@r   r   c                   |    e Zd ZU eed<   eeef   ed<   ej                  Z	ej                  ed<   d Z
	 d	dedefdZy)
FlaxBeitSelfAttentionra   r!   r*   c                 T   | j                   j                  | j                   j                  z  dk7  rPt        | j                   d      s:t	        d| j                   j                   d| j                   j                   d      t        j                  | j                   j                  | j                  t        j
                  j                  j                  | j                   j                              | _        t        j                  | j                   j                  | j                  t        j
                  j                  j                  | j                   j                        d      | _        t        j                  | j                   j                  | j                  t        j
                  j                  j                  | j                   j                              | _        | j                  r2t!        | j                   | j                  | j                  	      | _        y d | _        y )
Nr   embedding_sizezThe hidden size z4 is not a multiple of the number of attention heads .)r*   rg   F)r*   rg   use_biasr!   r*   )ra   rn   r   hasattrru   r[   Denser*   rL   ro   rp   rq   queryr@   valuer!   r   r   rP   s    r    rs   zFlaxBeitSelfAttention.setup  s   ;;""T[[%D%DDIRYKK)S
 "4;;#:#:"; <889< 
 XXKK##**++224;;3P3PQ


 88KK##**++224;;3P3PQ	
 XXKK##**++224;;3P3PQ

  )$BRBRZ^ZdZde 	#  	#r   NrF   output_attentionsc                 b   | j                   j                  | j                   j                  z  }| j                  |      j	                  |j
                  d d | j                   j                  |fz         }| j                  |      j	                  |j
                  d d | j                   j                  |fz         }| j                  |      j	                  |j
                  d d | j                   j                  |fz         }d }	|s*| j                   j                  dkD  r| j                  d      }	t        j                  d| j                        }
| j                  ?t        j                  | j                         d      }
|
j                  |j                        }
||
|j                  |
j                        z   }
t!        |||
|	| j                   j                  d|| j                  d 	      }t        j"                  d||      }|j	                  |j
                  d d d	z         }|r||f}|S |f}|S )
Nr$   rH   r   r   r   T)biasdropout_rngdropout_ratebroadcast_dropoutrF   r*   	precisionz...hqk,...khd->...qhd)r'   )ra   rn   r   r   r/   r)   r   r@   attention_probs_dropout_probrK   r3   r4   r*   r   r   r   r   einsum)rP   hidden_statesr   rF   r   head_dimquery_statesvalue_states
key_statesr   attention_biasattn_weightsattn_outputoutputss                 r    rW   zFlaxBeitSelfAttention.__call__-  s    ;;**dkk.M.MMzz-088#t{{'F'F&QQ
 zz-088#t{{'F'F&QQ
 XXm,44#t{{'F'F&QQ

 !I!IC!O--	2K3djj9&&2 __T-H-H-JANN+22<3E3EFN "-+.D.K.KNL`L`.aaN4#AA"'**

 jj!8,U!))+*;*;BQ*?%*GH1B;- JUr   NTFr   r   r   r   rZ   r   r   r3   r|   r*   rs   r^   rW   r   r   r    r   r     sJ    sCx {{E399"
B qv-IM-im-r   r   c                   b    e Zd ZU eed<   ej                  Zej                  ed<   d Zdde	fdZ
y)FlaxBeitSelfOutputra   r*   c                 N   t        j                  | j                  j                  t        j                   j
                  j                  | j                  j                        | j                        | _	        t        j                  | j                  j                        | _        y Nrg   r*   r   r[   r   ra   rn   rL   ro   rp   rq   r*   denser   r   r   r   s    r    rs   zFlaxBeitSelfOutput.setupa  d    XXKK##++224;;3P3PQ**


 zzt{{'F'FGr   rF   c                 N    | j                  |      }| j                  ||      }|S Nr   r   r   rP   r   rF   s      r    rW   zFlaxBeitSelfOutput.__call__i  s(    

=1]-Pr   NrX   r   r   r   r   rZ   r3   r|   r*   rs   r^   rW   r   r   r    r   r   ]  s,    {{E399"HT r   r   c                   x    e Zd ZU eed<   eeef   ed<   ej                  Z	ej                  ed<   d Z
	 ddefdZy)	FlaxBeitAttentionra   r!   r*   c                     t        | j                  | j                  | j                        | _        t        | j                  | j                        | _        y )Nr   )r   ra   r!   r*   	attentionr   rV   r   s    r    rs   zFlaxBeitAttention.setupt  s9    .t{{D<L<LTXT^T^_(DJJGr   Nr   c                 |    | j                  ||||      }|d   }| j                  ||      }|f}|r	||d   fz  }|S NrF   r   r   r   r   )r   rV   )rP   r   r   rF   r   attn_outputsr   r   s           r    rW   zFlaxBeitAttention.__call__x  s_     ~~1bs & 
 #1okk+]kK.Q))Gr   r   r   r   r   r    r   r   o  sB    sCx {{E399"H
 inaer   r   c                   Z    e Zd ZU eed<   ej                  Zej                  ed<   d Zd Z	y)FlaxBeitIntermediatera   r*   c                 4   t        j                  | j                  j                  t        j                   j
                  j                  | j                  j                        | j                        | _	        t        | j                  j                     | _        y )Nr   )r[   r   ra   intermediate_sizerL   ro   rp   rq   r*   r   r   
hidden_act
activationr   s    r    rs   zFlaxBeitIntermediate.setup  s`    XXKK))++224;;3P3PQ**


 !!7!78r   c                 J    | j                  |      }| j                  |      }|S r>   )r   r   )rP   r   s     r    rW   zFlaxBeitIntermediate.__call__  s$    

=16r   Nr{   r   r   r    r   r     s$    {{E399"9r   r   c                   b    e Zd ZU eed<   ej                  Zej                  ed<   d Zdde	fdZ
y)FlaxBeitOutputra   r*   c                 N   t        j                  | j                  j                  t        j                   j
                  j                  | j                  j                        | j                        | _	        t        j                  | j                  j                        | _        y r   r   r   s    r    rs   zFlaxBeitOutput.setup  r   r   rF   c                 N    | j                  |      }| j                  ||      }|S r   r   r   s      r    rW   zFlaxBeitOutput.__call__  s(    

=1]-Pr   NrX   r   r   r   r    r   r     s,    {{E399"HT r   r   c                       e Zd ZU eed<   eeef   ed<   eed<   ej                  Z
ej                  ed<   d Z	 d
dedefd	Zy)FlaxBeitLayerra   r!   drop_path_rater*   c                 z   t        | j                  | j                  | j                        | _        t        | j                  | j                        | _        t        | j                  | j                        | _        t        j                  | j                  j                  | j                        | _        t        | j                        | _        t        j                  | j                  j                  | j                        | _        | j                  j"                  | _        | j$                  dkD  rw| j'                  dt(        | j                  j*                  | j$                        | _        | j'                  dt(        | j                  j*                  | j$                        | _        y d | _        d | _        y )Nr   epsilonr*   r   r   lambda_1lambda_2)r   ra   r!   r*   r   r   intermediater   rV   r[   	LayerNormlayer_norm_epslayernorm_beforerD   r   	drop_pathlayernorm_afterlayer_scale_init_valueinit_valuesr   rB   rn   r   r   r   s    r    rs   zFlaxBeitLayer.setup  s&   *4;;8H8HPTPZPZ[0DJJO$T[[

C "T[[5O5OW[WaWa b)t/B/BC!||DKK4N4NVZV`V`a;;==a JJz?T[[E\E\_c_o_opDM JJz?T[[E\E\_c_o_opDM DM DMr   NrF   r   c                    | j                  | j                  |      |||      }|d   }| j                  (| j                  j                  |j                        |z  }| j                  ||      |z   }| j                  |      }| j                  |      }| j                  ||      }| j                  (| j                  j                  |j                        |z  }| j                  ||      |z   }|f}|r	||d   fz  }|S r   )
r   r   r   r   r*   r   r   r   rV   r   )	rP   r   r   rF   r   self_attention_outputsattention_outputlayer_outputr   s	            r    rW   zFlaxBeitLayer.__call__  s     "&!!-0"'/	 "0 "
 2!4 ==$#}}334D4J4JKN^^ '7}UXee ++M:((6{{<}{M ==$==//0B0BClRL ~~l-~PS``/.q133Gr   r   )r   r   r   r   rZ   r   r   rY   r3   r|   r*   rs   r^   rW   r   r   r    r   r     sO    sCx {{E399"!" qv$IM$im$r   r   c            	           e Zd ZU eed<   eeef   ed<   ee   ed<   e	g e
j                  f   ed<   e
j                  Ze
j                  ed<   d Z	 	 	 	 ddeded	ed
efdZy)FlaxBeitLayerCollectionra   r!   drop_path_ratesr   r*   c                 &   t        | j                  j                        D cg c]^  }t        | j                  | j                  j                  r| j
                  nd | j                  |   t        |      | j                        ` c}| _	        y c c}w )N)r!   r   namer*   )
rangera   num_hidden_layersr   use_relative_position_biasr!   r   strr*   layers)rP   is     r    rs   zFlaxBeitLayerCollection.setup  st     4;;889	
  040V0VD,,\`#33A6Vjj	
 	
s   A#BrF   r   output_hidden_statesreturn_dictc                 4   |rdnd }|rdnd }t        | j                        D ]H  \  }}	|r||fz  }| j                  | j                         nd }
 |	||
||      }|d   }|s@||d   fz  }J |r||fz  }|f}|st        d |D              S t	        |||      S )Nr   r   r   r   c              3   &   K   | ]	  }||  y wr>   r   ).0vs     r    	<genexpr>z3FlaxBeitLayerCollection.__call__.<locals>.<genexpr>  s     =qq}=s   )last_hidden_stater   
attentions)	enumerater   r   r   r   )rP   r   rF   r   r  r  all_attentionsall_hidden_statesr  layerr   layer_outputsr   s                r    rW   z FlaxBeitLayerCollection.__call__  s      1d"6BD!$++. 	6HAu#!m%55!FJFaFaFmT%@%@%Bsw"!5]fwM *!,M =#3"55	6  -!11 "=G==="+;LYg
 	
r   NTFFT)r   r   r   r   rZ   r   r   listrY   r   r3   ndarrayr|   r*   rs   r^   rW   r   r   r    r   r     s    sCx %[ $R_55{{E399"

 #"'%* !
 !
  	!

 #!
 !
r   r   c            	           e Zd ZU eed<   eeef   ed<   ej                  Z	ej                  ed<   d Z
	 	 	 	 ddedededefd	Zy
)FlaxBeitEncoderra   r!   r*   c                    | j                   j                  r1t        | j                   | j                  | j                        | _        t        t        j                  d| j                   j                  | j                   j                              }t        | j                   | j                  || j                   j                  r| j
                  nd | j                        | _        y )N)ra   r!   r*   r   )r!   r   r   r*   )ra   !use_shared_relative_position_biasr   r!   r*   r   r  r+   linspacer   r   r   r  )rP   r   s     r    rs   zFlaxBeitEncoder.setup(  s    ;;88*F{{0@0@

+D'
 r{{1dkk.H.H$++JgJghi,KK((+{{<< $(#>#>**

r   rF   r   r  r  c                 .    | j                  |||||      S )NrF   r   r  r  )r  )rP   r   rF   r   r  r  s         r    rW   zFlaxBeitEncoder.__call__:  s)     zz'/!5#  
 	
r   Nr  r   r   r   r    r  r  #  sh    sCx {{E399"
* #"'%* 
 
  	

 #
 
r   r  c                   |    e Zd ZU dZeZdZdZdZe	j                  ed<   ddej                  dfded	ed
ej                  def fdZddej&                  j(                  dededefdZ eej5                  d            	 	 	 	 	 	 	 ddee   dej&                  j(                  dedee   dee   dee   fd       Z xZS )FlaxBeitPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    beitrv   Nmodule_classr   Tra   seedr*   _do_initc                      | j                   d||d|}|$d|j                  |j                  |j                  f}t        |   ||||||       y )N)ra   r*   r   )input_shaper  r*   r  r   )r  ri   rh   super__init__)	rP   ra   r!  r  r*   r  kwargsr\   	__class__s	           r    r#  z FlaxBeitPreTrainedModel.__init__V  sc     #""H&HHf//1B1BFDWDWXK[tSXcklr   rS   r!  paramsr"   c                    t        j                  || j                        }t        j                  j                  |      \  }}t        j                  j                  |      \  }}|||d}| j                  j                  ||d      d   }	|dt        t        |	            }	t        t        |            }| j                  D ]
  }
|	|
   ||
<    t               | _
        t        t        |            S |	S )Nr   )r&  r   rI   F)r  r&  )r3   r1   r*   rL   rM   splitr\   initr	   r   _missing_keyssetr   r
   )rP   rS   r!  r&  rv   
params_rngr   droppath_rngrngsrandom_paramsmissing_keys              r    init_weightsz$FlaxBeitPreTrainedModel.init_weightsd  s    yyDJJ?"%**"2"23"7
K$'JJ$4$4[$A!\$,W((|(OPXY(-)@AM!(6"23F#11 A&3K&@{#A!$D.011  r   zbatch_size, sequence_lengthr   trainr   r  r  c	           
         ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }t	        j
                  |d      }i }	|,t        j                  j                  |      \  }}
||	d<   |
|	d<   | j                  j                  d|xs | j                  it	        j                  |t        j                        || ||||	      S )N)r   r$   r   r   r   rI   r&  r   )r.  )ra   r   r  r  r3   r0   rL   rM   r(  r\   applyr&  r4   r|   )rP   rv   r   r&  r   r2  r   r  r  r.  r-  s              r    rW   z FlaxBeitPreTrainedModel.__call__x  s     2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY}}\<@"(+

(8(8(E%K)DO+D{{  v,-IIl#++6I  ! 	
 		
r   r>   )NNNFNNN)r   r   r   r   r   config_classbase_model_prefixmain_input_namer  r[   ModulerZ   r3   r|   r   r*   r^   r#  rL   rM   PRNGKeyr   r   r1  r   BEIT_INPUTS_DOCSTRINGformatr   dictrW   __classcell__)r%  s   @r    r  r  K  s/   
 L$O"L"))"
 ;;mm 	m
 yym m!

 2 2 ! !PZ !fp !( ++@+G+GHe+fg !%*.,0/3&*"
 	"

 ZZ''"
 "
 $D>"
 'tn"
 d^"
 h"
r   r  c                   Z    e Zd ZU eed<   ej                  Zej                  ed<   d Zd Z	y)FlaxBeitPoolerra   r*   c                     | j                   j                  r;t        j                  | j                   j                  | j
                        | _        y y )Nr   )ra   use_mean_poolingr[   r   r   r*   	layernormr   s    r    rs   zFlaxBeitPooler.setup  s7    ;;''\\$++2L2LTXT^T^_DN (r   c                     | j                   j                  r6|d d dd d d f   }| j                  t        j                  |d            }|S |d d df   }|S )Nr   r   r   )ra   rA  rB  r3   mean)rP   r   patch_tokenspooled_outputs       r    rW   zFlaxBeitPooler.__call__  sX    ;;''(AB2L NN388Lq+IJM
  *!Q$/Mr   Nr{   r   r   r    r?  r?    s%    {{E399"`	r   r?  c            	           e Zd ZU eed<   ej                  Zej                  ed<   dZe	ed<   d Z
	 	 	 	 	 dde	de	d	e	d
e	fdZy)FlaxBeitModulera   r*   Tadd_pooling_layerc                    t        | j                  | j                        | _        t	        | j                  | j                  j
                  j                  | j                        | _        | j                  j                  s:t        j                  | j                  j                  | j                        | _        | j                  r't        | j                  | j                        | _        y d | _        y )Nr   r   r   )r~   ra   r*   rw   r  r   rl   encoderrA  r[   r   r   rB  rI  r?  poolerr   s    r    rs   zFlaxBeitModule.setup  s    ,T[[

K&KKT__%E%E%Q%QY]YcYc
 {{++\\$++2L2LTXT^T^_DNGKG]G]nT[[

Ccgr   NrF   r   r  r  c                 `   | j                  |||      }| j                  |||||      }|d   }| j                  j                  s| j	                  |      }| j
                  r| j                  |      nd }	|s|		|f|dd  z   S ||	f|dd  z   S t        ||	|j                  |j                        S )Nr   r  r   r   )r	  pooler_outputr   r
  )
rw   rK  ra   rA  rB  rI  rL  r   r   r
  )
rP   rv   r   rF   r   r  r  r   r   pooleds
             r    rW   zFlaxBeitModule.__call__  s     oUbc,,'/!5#  
  
{{++ NN=9M/3/E/E]+4~%''!"+55!6*WQR[88-+ !//))	
 	
r   )NTFFT)r   r   r   r   rZ   r3   r|   r*   rI  r^   rs   rW   r   r   r    rH  rH    si    {{E399""t"h ""'%* "
 	"

  "
 #"
 "
r   rH  z^The bare Beit Model transformer outputting raw hidden-states without any specific head on top.c                       e Zd ZeZy)FlaxBeitModelN)r   r   r   rH  r  r   r   r    rQ  rQ    s	    
 "Lr   rQ  a  
    Returns:

    Examples:

    ```python
    >>> from transformers import AutoImageProcessor, FlaxBeitModel
    >>> from PIL import Image
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> image_processor = AutoImageProcessor.from_pretrained("microsoft/beit-base-patch16-224-pt22k-ft22k")
    >>> model = FlaxBeitModel.from_pretrained("microsoft/beit-base-patch16-224-pt22k-ft22k")

    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> last_hidden_states = outputs.last_hidden_state
    ```
)output_typer5  c                   n    e Zd ZU eed<   ej                  Zej                  ed<   d Z	 	 	 	 	 	 dde	fdZ
y)$FlaxBeitForMaskedImageModelingModulera   r*   c                    t        | j                  d| j                        | _        t	        j
                  | j                  j                  | j                        | _        t	        j                  | j                  j                  t        j                  j                  j                  | j                  j                        | j                        | _        y )NF)rI  r*   r   r   )rH  ra   r*   r  r[   r   r   rB  r   
vocab_sizerL   ro   rp   rq   lm_headr   s    r    rs   z*FlaxBeitForMaskedImageModelingModule.setup  s    "4;;%tzzZ	 dkk.H.HPTPZPZ[xxKK""++224;;3P3PQ**
r   NrF   c                 "   ||n| j                   j                  }| j                  ||||||      }|d   }| j                  |      }| j	                  |d d dd f         }	|s|	f|dd  z   }
|
S t        |	|j                  |j                        S )Nr  r   r   r$   logitsr   r
  )ra   use_return_dictr  rB  rW  r   r   r
  )rP   rv   r   rF   r   r  r  r   sequence_outputprediction_scoresrV   s              r    rW   z-FlaxBeitForMaskedImageModelingModule.__call__  s     &1%<k$++B]B]))'/!5#  
 "!*..9 LLAB)?@')GABK7FM!$!//))
 	
r   NNTNNNr   r   r   r    rT  rT    sB    {{E399"	
 "! 
 	 
r   rT  zYBeit Model transformer with a 'language' modeling head on top (to predict visual tokens).c                       e Zd ZeZy)FlaxBeitForMaskedImageModelingN)r   r   r   rT  r  r   r   r    r`  r`  9  s	    
 8Lr   r`  a?  
    bool_masked_pos (`numpy.ndarray` of shape `(batch_size, num_patches)`):
        Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).

    Returns:

    Examples:

    ```python
    >>> from transformers import AutoImageProcessor, BeitForMaskedImageModeling
    >>> from PIL import Image
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> image_processor = AutoImageProcessor.from_pretrained("microsoft/beit-base-patch16-224-pt22k")
    >>> model = BeitForMaskedImageModeling.from_pretrained("microsoft/beit-base-patch16-224-pt22k")

    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> logits = outputs.logits
    ```
c                   n    e Zd ZU eed<   ej                  Zej                  ed<   d Z	 	 	 	 	 	 dde	fdZ
y)$FlaxBeitForImageClassificationModulera   r*   c                 >   t        | j                  | j                  d      | _        t	        j
                  | j                  j                  t        j                  j                  j                  | j                  j                        | j                        | _        y )NT)ra   r*   rI  r   )rH  ra   r*   r  r[   r   
num_labelsrL   ro   rp   rq   
classifierr   s    r    rs   z*FlaxBeitForImageClassificationModule.setupd  sa    "$++TZZ[_`	((KK""++224;;3P3PQ**
r   NrF   c                     ||n| j                   j                  }| j                  |||||      }|d   }| j                  |      }	|s|	f|dd  z   }
|
S t	        |	|j
                  |j                        S )Nr  r   r$   rY  )ra   r[  r  re  r   r   r
  )rP   rv   r   rF   r   r  r  r   rF  rZ  rV   s              r    rW   z-FlaxBeitForImageClassificationModule.__call__l  s     &1%<k$++B]B]))'/!5#  
  
/Y,FM+!//))
 	
r   r^  r   r   r   r    rb  rb  `  sB    {{E399"
 "!
 	
r   rb  z
    Beit Model transformer with an image classification head on top (a linear layer on top of the average of the final
    hidden states of the patch tokens) e.g. for ImageNet.
    c                       e Zd ZeZy)FlaxBeitForImageClassificationN)r   r   r   rb  r  r   r   r    rh  rh    s	     8Lr   rh  aM  
    Returns:

    Example:

    ```python
    >>> from transformers import AutoImageProcessor, FlaxBeitForImageClassification
    >>> from PIL import Image
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> image_processor = AutoImageProcessor.from_pretrained("microsoft/beit-base-patch16-224")
    >>> model = FlaxBeitForImageClassification.from_pretrained("microsoft/beit-base-patch16-224")

    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> logits = outputs.logits
    >>> # model predicts one of the 1000 ImageNet classes
    >>> predicted_class_idx = logits.argmax(-1).item()
    >>> print("Predicted class:", model.config.id2label[predicted_class_idx])
    ```
)rh  r`  rQ  r  )Htypingr   r   flax
flax.linenlinenr[   rL   	jax.numpynumpyr3   r+   flax.core.frozen_dictr   r   r   flax.linen.attentionr   flax.traverse_utilr	   r
   modeling_flax_outputsr   r   r   r   modeling_flax_utilsr   r   r   r   utilsr   r   configuration_beitr   struct	dataclassr   BEIT_START_DOCSTRINGr:  r   r   r  r<   r|   rB   r8  rD   r`   r~   r   r   r   r   r   r   r   r   r  r  r?  rH  rQ  FLAX_BEIT_MODEL_DOCSTRINGrT  r`  FLAX_BEIT_MLM_DOCSTRINGrb  rh  FLAX_BEIT_CLASSIF_DOCSTRING__all__r   r   r    <module>r}     s  " &   
   > > > ;   Q * %C  ,! F ".eCHo .#++ .0 .1[[ *ryy *Cbii C@& &R@299 @.RBII Rj $		 4299 &RYY &:BII :z4
bii 4
n%
bii %
PP
1 P
fRYY (0
RYY 0
f d"+ "	" , (A B  <Zis t/
299 /
d _8%< 8	8 2 79P Q  "0BQ[
*
299 *
Z  8%< 88 2 79T U  "0L[e
r   