
    rh                        d Z ddlZddlmZ ddlmZ ddlmZmZ ddl	Z	ddl
Z	ddl	mZ ddlmZmZmZ dd	lmZmZ  e       rdd
lmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZm Z m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddlm'Z' ddl(m)Z)  e'jT                  e+      Z,d Z-d Z.d Z/ G d dej`                        Z1 G d dej`                        Z2 G d dej`                        Z3 G d dej`                        Z4 G d dej`                        Z5 G d  d!ej`                        Z6 G d" d#e      Z7 G d$ d%ej`                        Z8 G d& d'ej`                        Z9 G d( d)ej`                        Z: G d* d+ej`                        Z; G d, d-ej`                        Z< G d. d/ej`                        Z= G d0 d1ej`                        Z>e G d2 d3e$             Z?e ed45       G d6 d7e                    Z@e G d8 d9e?             ZA ed:5       G d; d<e?             ZBe G d= d>e?             ZC ed?5       G d@ dAe?             ZD edB5       G dC dDe?             ZEe G dE dFe?             ZFe G dG dHe?             ZGe G dI dJe?             ZHg dKZIy)LzPyTorch FNet model.    N)	dataclass)partial)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )auto_docstringis_scipy_available)linalg)ACT2FN)GradientCheckpointingLayer)	BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputModelOutputMultipleChoiceModelOutputNextSentencePredictorOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward)logging   )
FNetConfigc                     | j                   d   }|d|d|f   }| j                  t        j                        } t        j                  d| ||      S )z4Applies 2D matrix multiplication to 3D input arrays.r   Nzbij,jk,ni->bnk)shapetypetorch	complex64einsum)xmatrix_dim_onematrix_dim_two
seq_lengths       y/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/fnet/modeling_fnet.py_two_dim_matmulr*   8   sN    J#KZK*$<=N	uA<<(!^^LL    c                     t        | ||      S N)r*   )r%   r&   r'   s      r)   two_dim_matmulr.   A   s    1nn==r+   c                     | }t        t        | j                        dd       D ]#  }t        j                  j	                  ||      }% |S )z
    Applies n-dimensional Fast Fourier Transform (FFT) to input array.

    Args:
        x: Input n-dimensional array.

    Returns:
        n-dimensional Fourier transform of input n-dimensional array.
    r   N)axis)reversedrangendimr"   fft)r%   outr0   s      r)   fftnr6   F   sG     Cqvvqr*+ ,iimmCdm+,Jr+   c                   *     e Zd ZdZ fdZddZ xZS )FNetEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                 x   t         |           t        j                  |j                  |j
                  |j                        | _        t        j                  |j                  |j
                        | _	        t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j
                  |j
                        | _        t        j                   |j"                        | _        | j'                  dt)        j*                  |j                        j-                  d      d       | j'                  dt)        j.                  | j0                  j3                         t(        j4                        d       y )	N)padding_idxepsposition_ids)r   F)
persistenttoken_type_idsdtype)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsLinear
projectionDropouthidden_dropout_probdropoutregister_bufferr"   arangeexpandzerosr=   sizelongselfconfig	__class__s     r)   rD   zFNetEmbeddings.__init__Y   s<   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]" f&8&8f>S>ST))F$6$68J8JKzz&"<"<= 	ELL)G)GHOOPWXej 	 	
 	ekk$*;*;*@*@*B%**Ubg 	 	
r+   c                 X   ||j                         }n|j                         d d }|d   }|| j                  d d d |f   }|st        | d      r-| j                  d d d |f   }|j	                  |d   |      }|}n:t        j                  |t
        j                  | j                  j                        }|| j                  |      }| j                  |      }	||	z   }
| j                  |      }|
|z  }
| j                  |
      }
| j                  |
      }
| j                  |
      }
|
S )Nr>   r   r@   r   rB   device)rY   r=   hasattrr@   rW   r"   rX   rZ   ra   rI   rM   rK   rN   rQ   rT   )r\   	input_idsr@   r=   inputs_embedsinput_shaper(   buffered_token_type_ids buffered_token_type_ids_expandedrM   
embeddingsrK   s               r)   forwardzFNetEmbeddings.forwardo   s=    #..*K',,.s3K ^
,,Q^<L
 !t-.*.*=*=a*n*M'3J3Q3QR]^_R`bl3m0!A!&[

SWSdSdSkSk!l  00;M $ : :> J"%::
"66|D))
^^J/
__Z0
\\*-
r+   )NNNN)__name__
__module____qualname____doc__rD   ri   __classcell__r^   s   @r)   r8   r8   V   s    Q
,!r+   r8   c                   *     e Zd Z fdZd Zd Z xZS )FNetBasicFourierTransformc                 D    t         |           | j                  |       y r-   )rC   rD   _init_fourier_transformr[   s     r)   rD   z"FNetBasicFourierTransform.__init__   s    $$V,r+   c                    |j                   s+t        t        j                  j                  d      | _        y |j                  dk  rt               r| j                  dt        j                  t        j                  |j                        t        j                               | j                  dt        j                  t        j                  |j                        t        j                               t        t        | j                   | j"                        | _        y t%        j&                  d       t        | _        y t        | _        y )	N)r      dim   dft_mat_hiddenrA   dft_mat_seq)r&   r'   zpSciPy is needed for DFT matrix calculation and is not found. Using TPU optimized fast fourier transform instead.)use_tpu_fourier_optimizationsr   r"   r4   r6   fourier_transformrJ   r   rU   tensorr   dftrG   r#   tpu_short_seq_lengthr.   rz   ry   r   warning)r\   r]   s     r)   rs   z1FNetBasicFourierTransform._init_fourier_transform   s    33%,UYY^^%HD"++t3!#$$$ell6::f>P>P3QY^YhYh&i $$!5<<

6;V;V0W_d_n_n#o *1"43C3CTXTgTg*& * *.&%)D"r+   c                 >    | j                  |      j                  }|fS r-   )r|   real)r\   hidden_statesoutputss      r)   ri   z!FNetBasicFourierTransform.forward   s"     ((7<<zr+   )rj   rk   rl   rD   rs   ri   rn   ro   s   @r)   rq   rq      s    -*.r+   rq   c                   $     e Zd Z fdZd Z xZS )FNetBasicOutputc                     t         |           t        j                  |j                  |j
                        | _        y Nr;   )rC   rD   r   rN   rG   rO   r[   s     r)   rD   zFNetBasicOutput.__init__   s,    f&8&8f>S>STr+   c                 .    | j                  ||z         }|S r-   )rN   r\   r   input_tensors      r)   ri   zFNetBasicOutput.forward   s    |m'CDr+   rj   rk   rl   rD   ri   rn   ro   s   @r)   r   r      s    Ur+   r   c                   $     e Zd Z fdZd Z xZS )FNetFourierTransformc                 b    t         |           t        |      | _        t	        |      | _        y r-   )rC   rD   rq   r\   r   outputr[   s     r)   rD   zFNetFourierTransform.__init__   s&    -f5	%f-r+   c                 X    | j                  |      }| j                  |d   |      }|f}|S Nr   )r\   r   )r\   r   self_outputsfourier_outputr   s        r)   ri   zFNetFourierTransform.forward   s1    yy/\!_mD!#r+   r   ro   s   @r)   r   r      s    .
r+   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )FNetIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y r-   )rC   rD   r   rP   rG   intermediate_sizedense
isinstance
hidden_actstrr   intermediate_act_fnr[   s     r)   rD   zFNetIntermediate.__init__   s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$r+   r   returnc                 J    | j                  |      }| j                  |      }|S r-   )r   r   r\   r   s     r)   ri   zFNetIntermediate.forward   s&    

=100?r+   rj   rk   rl   rD   r"   Tensorri   rn   ro   s   @r)   r   r      s#    9U\\ ell r+   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )
FNetOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )rC   rD   r   rP   r   rG   r   rN   rO   rR   rS   rT   r[   s     r)   rD   zFNetOutput.__init__   s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r+   r   r   r   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S r-   )r   rT   rN   r   s      r)   ri   zFNetOutput.forward   s7    

=1]3}|'CDr+   r   ro   s   @r)   r   r      s1    >U\\  RWR^R^ r+   r   c                   *     e Zd Z fdZd Zd Z xZS )	FNetLayerc                     t         |           |j                  | _        d| _        t	        |      | _        t        |      | _        t        |      | _	        y Nr   )
rC   rD   chunk_size_feed_forwardseq_len_dimr   fourierr   intermediater   r   r[   s     r)   rD   zFNetLayer.__init__   sI    '-'E'E$+F3,V4 (r+   c                     | j                  |      }|d   }t        | j                  | j                  | j                  |      }|f}|S r   )r   r   feed_forward_chunkr   r   )r\   r   self_fourier_outputsr   layer_outputr   s         r)   ri   zFNetLayer.forward   sO    #||M:-a00##T%A%A4CSCSUc
  /r+   c                 L    | j                  |      }| j                  ||      }|S r-   )r   r   )r\   r   intermediate_outputr   s       r)   r   zFNetLayer.feed_forward_chunk  s*    "//?{{#6Gr+   )rj   rk   rl   rD   ri   r   rn   ro   s   @r)   r   r      s    )
r+   r   c                   &     e Zd Z fdZddZ xZS )FNetEncoderc                     t         |           || _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        d| _	        y c c}w )NF)
rC   rD   r]   r   
ModuleListr2   num_hidden_layersr   layergradient_checkpointing)r\   r]   _r^   s      r)   rD   zFNetEncoder.__init__  sN    ]]uVE]E]?^#_!If$5#_`
&+# $`s   A#c                     |rdnd }t        | j                        D ]  \  }}|r||fz   } ||      }|d   } |r||fz   }|st        d ||fD              S t        ||      S )N r   c              3   &   K   | ]	  }||  y wr-   r   ).0vs     r)   	<genexpr>z&FNetEncoder.forward.<locals>.<genexpr>   s     Xq!-Xs   )last_hidden_stater   )	enumerater   tupler   )r\   r   output_hidden_statesreturn_dictall_hidden_statesilayer_modulelayer_outputss           r)   ri   zFNetEncoder.forward  s    "6BD(4 	-OA|#$58H$H!(7M)!,M	-   1]4D DX]4E$FXXXN_``r+   )FTr   ro   s   @r)   r   r   
  s    ,ar+   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )
FNetPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y r-   )rC   rD   r   rP   rG   r   Tanh
activationr[   s     r)   rD   zFNetPooler.__init__'  s9    YYv1163E3EF
'')r+   r   r   c                 \    |d d df   }| j                  |      }| j                  |      }|S r   )r   r   )r\   r   first_token_tensorpooled_outputs       r)   ri   zFNetPooler.forward,  s6     +1a40

#566r+   r   ro   s   @r)   r   r   &  s#    $
U\\ ell r+   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )FNetPredictionHeadTransformc                 h   t         |           t        j                  |j                  |j                        | _        t        |j                  t              rt        |j                     | _
        n|j                  | _
        t        j                  |j                  |j                        | _        y r   )rC   rD   r   rP   rG   r   r   r   r   r   transform_act_fnrN   rO   r[   s     r)   rD   z$FNetPredictionHeadTransform.__init__7  s{    YYv1163E3EF
f''-$*6+<+<$=D!$*$5$5D!f&8&8f>S>STr+   r   r   c                 l    | j                  |      }| j                  |      }| j                  |      }|S r-   )r   r   rN   r   s     r)   ri   z#FNetPredictionHeadTransform.forward@  s4    

=1--m<}5r+   r   ro   s   @r)   r   r   6  s$    UU\\ ell r+   r   c                   ,     e Zd Z fdZd ZddZ xZS )FNetLMPredictionHeadc                 D   t         |           t        |      | _        t	        j
                  |j                  |j                        | _        t	        j                  t        j                  |j                              | _        | j                  | j                  _        y r-   )rC   rD   r   	transformr   rP   rG   rF   decoder	Parameterr"   rX   biasr[   s     r)   rD   zFNetLMPredictionHead.__init__H  si    4V< yy!3!3V5F5FGLLV->->!?@	 IIr+   c                 J    | j                  |      }| j                  |      }|S r-   )r   r   r   s     r)   ri   zFNetLMPredictionHead.forwardS  s$    }5]3r+   c                     | j                   j                  j                  j                  dk(  r| j                  | j                   _        y | j                   j                  | _        y )Nmeta)r   r   ra   r!   r\   s    r)   _tie_weightsz!FNetLMPredictionHead._tie_weightsX  sC    <<##((F2 $		DLL ))DIr+   )r   N)rj   rk   rl   rD   ri   r   rn   ro   s   @r)   r   r   G  s    	&
*r+   r   c                   $     e Zd Z fdZd Z xZS )FNetOnlyMLMHeadc                 B    t         |           t        |      | _        y r-   )rC   rD   r   predictionsr[   s     r)   rD   zFNetOnlyMLMHead.__init__b  s    /7r+   c                 (    | j                  |      }|S r-   )r   )r\   sequence_outputprediction_scoress      r)   ri   zFNetOnlyMLMHead.forwardf  s     ,,_=  r+   r   ro   s   @r)   r   r   a  s    8!r+   r   c                   $     e Zd Z fdZd Z xZS )FNetOnlyNSPHeadc                 l    t         |           t        j                  |j                  d      | _        y Nru   )rC   rD   r   rP   rG   seq_relationshipr[   s     r)   rD   zFNetOnlyNSPHead.__init__m  s'     "		&*<*<a @r+   c                 (    | j                  |      }|S r-   )r   )r\   r   seq_relationship_scores      r)   ri   zFNetOnlyNSPHead.forwardq  s    !%!6!6}!E%%r+   r   ro   s   @r)   r   r   l  s    A&r+   r   c                   $     e Zd Z fdZd Z xZS )FNetPreTrainingHeadsc                     t         |           t        |      | _        t	        j
                  |j                  d      | _        y r   )rC   rD   r   r   r   rP   rG   r   r[   s     r)   rD   zFNetPreTrainingHeads.__init__x  s4    /7 "		&*<*<a @r+   c                 N    | j                  |      }| j                  |      }||fS r-   )r   r   )r\   r   r   r   r   s        r)   ri   zFNetPreTrainingHeads.forward}  s0     ,,_=!%!6!6}!E "888r+   r   ro   s   @r)   r   r   w  s    A
9r+   r   c                   &    e Zd ZU eed<   dZdZd Zy)FNetPreTrainedModelr]   fnetTc                    t        |t        j                        rm|j                  j                  j                  d| j                  j                         |j                  %|j                  j                  j                          yyt        |t        j                        rz|j                  j                  j                  d| j                  j                         |j                  2|j                  j                  |j                     j                          yyt        |t        j                        rJ|j                  j                  j                          |j                  j                  j                  d       yy)zInitialize the weightsg        )meanstdNg      ?)r   r   rP   weightdatanormal_r]   initializer_ranger   zero_rE   r:   rN   fill_)r\   modules     r)   _init_weightsz!FNetPreTrainedModel._init_weights  s   fbii( MM&&CT[[5R5R&S{{&  &&( '-MM&&CT[[5R5R&S!!-""6#5#56<<> .-KK""$MM$$S) .r+   N)rj   rk   rl   r   __annotations__base_model_prefixsupports_gradient_checkpointingr  r   r+   r)   r   r     s    &*#*r+   r   z0
    Output type of [`FNetForPreTraining`].
    )custom_introc                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeeej                        ed<   y)FNetForPreTrainingOutputa  
    loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
        Total loss as the sum of the masked language modeling loss and the next sequence prediction
        (classification) loss.
    prediction_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    seq_relationship_logits (`torch.FloatTensor` of shape `(batch_size, 2)`):
        Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
        before SoftMax).
    Nlossprediction_logitsseq_relationship_logitsr   )rj   rk   rl   rm   r  r   r"   FloatTensorr  r  r  r   r   r   r+   r)   r
  r
    sd    	 )-D(5$$
%,59x 1 129;?Xe&7&78?8<M8E%"3"345<r+   r
  c                        e Zd ZdZd fd	Zd Zd Ze	 	 	 	 	 	 ddee	j                     dee	j                     dee	j                     dee	j                     d	ee   d
ee   deeef   fd       Z xZS )	FNetModelz

    The model can behave as an encoder, following the architecture described in [FNet: Mixing Tokens with Fourier
    Transforms](https://huggingface.co/papers/2105.03824) by James Lee-Thorp, Joshua Ainslie, Ilya Eckstein, Santiago Ontanon.

    c                     t         |   |       || _        t        |      | _        t        |      | _        |rt        |      nd| _        | j                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)
rC   rD   r]   r8   rh   r   encoderr   pooler	post_init)r\   r]   add_pooling_layerr^   s      r)   rD   zFNetModel.__init__  sM    
 	 (0"6*,=j(4 	r+   c                 .    | j                   j                  S r-   rh   rI   r   s    r)   get_input_embeddingszFNetModel.get_input_embeddings  s    ...r+   c                 &    || j                   _        y r-   r  )r\   values     r)   set_input_embeddingszFNetModel.set_input_embeddings  s    */'r+   rc   r@   r=   rd   r   r   r   c                 |   ||n| j                   j                  }||n| j                   j                  }||t        d      ||j	                         }|\  }}	n&||j	                         d d }|\  }}	nt        d      | j                   j
                  r)|	dk  r$| j                   j                  |	k7  rt        d      ||j                  n|j                  }
|pt        | j                  d      r4| j                  j                  d d d |	f   }|j                  ||	      }|}n&t        j                  |t        j                  |
      }| j                  ||||      }| j                  |||	      }|d
   }| j                   | j!                  |      nd }|s
||f|dd  z   S t#        |||j$                        S )NzDYou cannot specify both input_ids and inputs_embeds at the same timer>   z5You have to specify either input_ids or inputs_embedsrx   zThe `tpu_short_seq_length` in FNetConfig should be set equal to the sequence length being passed to the model when using TPU optimizations.r@   r`   )rc   r=   r@   rd   )r   r   r   r   )r   pooler_outputr   )r]   r   use_return_dict
ValueErrorrY   r{   r   ra   rb   rh   r@   rW   r"   rX   rZ   r  r  r   r   )r\   rc   r@   r=   rd   r   r   re   
batch_sizer(   ra   rf   rg   embedding_outputencoder_outputsr   r  s                    r)   ri   zFNetModel.forward  s    %9$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"#..*K%0"J
&',,.s3K%0"J
TUU KK55d"00J>; 
 &/%:!!@T@T!t(89*.//*H*HKZK*X'3J3Q3QR\^h3i0!A!&[

SY!Z??%)'	 + 
 ,,!5# ' 

 *!,8<8OO4UY#]3oab6III)-')77
 	
r+   )T)NNNNNN)rj   rk   rl   rm   rD   r  r  r   r   r"   
LongTensorr  boolr   r   r   ri   rn   ro   s   @r)   r  r    s     /0  15593759/3&*C
E,,-C
 !!1!12C
 u//0	C

   1 12C
 'tnC
 d^C
 
uo%	&C
 C
r+   r  z
    FNet Model with two heads on top as done during the pretraining: a `masked language modeling` head and a `next
    sentence prediction (classification)` head.
    c                   ,    e Zd ZddgZ fdZd Zd Ze	 	 	 	 	 	 	 	 ddee	j                     dee	j                     dee	j                     d	ee	j                     d
ee	j                     dee	j                     dee   dee   deeef   fd       Z xZS )FNetForPreTrainingcls.predictions.decoder.biascls.predictions.decoder.weightc                     t         |   |       t        |      | _        t	        |      | _        | j                          y r-   )rC   rD   r  r   r   clsr  r[   s     r)   rD   zFNetForPreTraining.__init__"  s4     f%	'/ 	r+   c                 B    | j                   j                  j                  S r-   r*  r   r   r   s    r)   get_output_embeddingsz(FNetForPreTraining.get_output_embeddings+      xx##+++r+   c                     || j                   j                  _        |j                  | j                   j                  _        y r-   r*  r   r   r   r\   new_embeddingss     r)   set_output_embeddingsz(FNetForPreTraining.set_output_embeddings.  ,    '5$$2$7$7!r+   rc   r@   r=   rd   labelsnext_sentence_labelr   r   r   c	                    ||n| j                   j                  }| j                  ||||||      }	|	dd \  }
}| j                  |
|      \  }}d}|u|st	               } ||j                  d| j                   j                        |j                  d            } ||j                  dd      |j                  d            }||z   }|s||f|	dd z   }||f|z   S |S t        ||||	j                        S )aH  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        next_sentence_label (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
            (see `input_ids` docstring) Indices should be in `[0, 1]`:

            - 0 indicates sequence B is a continuation of sequence A,
            - 1 indicates sequence B is a random sequence.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, FNetForPreTraining
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("google/fnet-base")
        >>> model = FNetForPreTraining.from_pretrained("google/fnet-base")
        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
        >>> outputs = model(**inputs)
        >>> prediction_logits = outputs.prediction_logits
        >>> seq_relationship_logits = outputs.seq_relationship_logits
        ```Nr@   r=   rd   r   r   ru   r>   )r  r  r  r   )	r]   r  r   r*  r	   viewrF   r
  r   )r\   rc   r@   r=   rd   r5  r6  r   r   r   r   r   r   r   
total_lossloss_fctmasked_lm_lossnext_sentence_lossr   s                      r)   ri   zFNetForPreTraining.forward2  s5   J &1%<k$++B]B])))%'!5#  
 *1!&48HH_m4\11
"5"A')H%&7&<&<RAWAW&XZ`ZeZefhZijN!)*@*E*Eb!*LNaNfNfgiNj!k'*<<J')?@712;NF/9/EZMF*Q6Q'/$:!//	
 	
r+   NNNNNNNN)rj   rk   rl   _tied_weights_keysrD   r-  r3  r   r   r"   r   r$  r   r   r
  ri   rn   ro   s   @r)   r&  r&    s     9:Z[,8  -115/304)-6:/3&*B
ELL)B
 !.B
 u||,	B

  -B
 &B
 &ell3B
 'tnB
 d^B
 
u..	/B
 B
r+   r&  c                       e Zd ZddgZ fdZd Zd Ze	 	 	 	 	 	 	 ddee	j                     dee	j                     dee	j                     d	ee	j                     d
ee	j                     dee   dee   deeef   fd       Z xZS )FNetForMaskedLMr'  r(  c                     t         |   |       t        |      | _        t	        |      | _        | j                          y r-   )rC   rD   r  r   r   r*  r  r[   s     r)   rD   zFNetForMaskedLM.__init__|  4     f%	"6* 	r+   c                 B    | j                   j                  j                  S r-   r,  r   s    r)   r-  z%FNetForMaskedLM.get_output_embeddings  r.  r+   c                     || j                   j                  _        |j                  | j                   j                  _        y r-   r0  r1  s     r)   r3  z%FNetForMaskedLM.set_output_embeddings  r4  r+   rc   r@   r=   rd   r5  r   r   r   c                 ~   ||n| j                   j                  }| j                  ||||||      }|d   }	| j                  |	      }
d}|Ft	               } ||
j                  d| j                   j                        |j                  d            }|s|
f|dd z   }||f|z   S |S t        ||
|j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
        Nr8  r   r>   ru   r  logitsr   )	r]   r  r   r*  r	   r9  rF   r   r   )r\   rc   r@   r=   rd   r5  r   r   r   r   r   r<  r;  r   s                 r)   ri   zFNetForMaskedLM.forward  s    " &1%<k$++B]B])))%'!5#  
 "!* HH_5')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY>:K[b[p[pqqr+   NNNNNNN)rj   rk   rl   r?  rD   r-  r3  r   r   r"   r   r$  r   r   r   ri   rn   ro   s   @r)   rA  rA  x  s    8:Z[,8  -115/304)-/3&*'rELL)'r !.'r u||,	'r
  -'r &'r 'tn'r d^'r 
un$	%'r 'rr+   rA  zT
    FNet Model with a `next sentence prediction (classification)` head on top.
    c                        e Zd Z fdZe	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     dee   dee   d	e	e
ef   fd
       Z xZS )FNetForNextSentencePredictionc                     t         |   |       t        |      | _        t	        |      | _        | j                          y r-   )rC   rD   r  r   r   r*  r  r[   s     r)   rD   z&FNetForNextSentencePrediction.__init__  rC  r+   rc   r@   r=   rd   r5  r   r   r   c                    d|v r+t        j                  dt               |j                  d      }||n| j                  j
                  }| j                  ||||||      }	|	d   }
| j                  |
      }d}|2t               } ||j                  dd      |j                  d            }|s|f|	dd z   }||f|z   S |S t        |||	j                        S )	a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
            (see `input_ids` docstring). Indices should be in `[0, 1]`:

            - 0 indicates sequence B is a continuation of sequence A,
            - 1 indicates sequence B is a random sequence.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, FNetForNextSentencePrediction
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("google/fnet-base")
        >>> model = FNetForNextSentencePrediction.from_pretrained("google/fnet-base")
        >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
        >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
        >>> encoding = tokenizer(prompt, next_sentence, return_tensors="pt")
        >>> outputs = model(**encoding, labels=torch.LongTensor([1]))
        >>> logits = outputs.logits
        >>> assert logits[0, 0] < logits[0, 1]  # next sentence was random
        ```r6  zoThe `next_sentence_label` argument is deprecated and will be removed in a future version, use `labels` instead.Nr8  r   r>   ru   rG  )warningswarnFutureWarningpopr]   r  r   r*  r	   r9  r   r   )r\   rc   r@   r=   rd   r5  r   r   kwargsr   r   seq_relationship_scoresr=  r;  r   s                  r)   ri   z%FNetForNextSentencePrediction.forward  s   H !F*MM%
 ZZ 56F%0%<k$++B]B])))%'!5#  
  
"&((="9!')H!)*A*F*Fr1*Mv{{[]!_-/'!"+=F7I7U')F2a[aa*#*!//
 	
r+   rI  )rj   rk   rl   rD   r   r   r"   r   r$  r   r   r   ri   rn   ro   s   @r)   rK  rK    s      -115/304)-/3&*G
ELL)G
 !.G
 u||,	G

  -G
 &G
 'tnG
 d^G
 
u11	2G
 G
r+   rK  z
    FNet Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    c                        e Zd Z fdZe	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     dee   dee   d	e	e
ef   fd
       Z xZS )FNetForSequenceClassificationc                 ,   t         |   |       |j                  | _        t        |      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        | j                          y r-   rC   rD   
num_labelsr  r   r   rR   rS   rT   rP   rG   
classifierr  r[   s     r)   rD   z&FNetForSequenceClassification.__init__  si      ++f%	zz&"<"<=))F$6$68I8IJ 	r+   rc   r@   r=   rd   r5  r   r   r   c                 $   ||n| j                   j                  }| j                  ||||||      }|d   }	| j                  |	      }	| j	                  |	      }
d}|| j                   j
                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rIt               }| j                  dk(  r& ||
j                         |j                               }n ||
|      }n| j                   j
                  dk(  r=t               } ||
j                  d| j                        |j                  d            }n,| j                   j
                  dk(  rt               } ||
|      }|s|
f|dd z   }||f|z   S |S t!        ||
|j"                  	      S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr8  r   
regressionsingle_label_classificationmulti_label_classificationr>   ru   rG  )r]   r  r   rT   rY  problem_typerX  rB   r"   rZ   intr
   squeezer	   r9  r   r   r   )r\   rc   r@   r=   rd   r5  r   r   r   r   rH  r  r;  r   s                 r)   ri   z%FNetForSequenceClassification.forward#  s   " &1%<k$++B]B])))%'!5#  
  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'T&PWPePeffr+   rI  )rj   rk   rl   rD   r   r   r"   r   r$  r   r   r   ri   rn   ro   s   @r)   rU  rU    s    	  -115/304)-/3&*9gELL)9g !.9g u||,	9g
  -9g &9g 'tn9g d^9g 
u..	/9g 9gr+   rU  c                        e Zd Z fdZe	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     dee   dee   d	e	e
ef   fd
       Z xZS )FNetForMultipleChoicec                     t         |   |       t        |      | _        t	        j
                  |j                        | _        t	        j                  |j                  d      | _
        | j                          y r   )rC   rD   r  r   r   rR   rS   rT   rP   rG   rY  r  r[   s     r)   rD   zFNetForMultipleChoice.__init__b  sV     f%	zz&"<"<=))F$6$6: 	r+   rc   r@   r=   rd   r5  r   r   r   c                    ||n| j                   j                  }||j                  d   n|j                  d   }|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|1|j                  d|j	                  d      |j	                  d            nd}| j                  ||||||      }	|	d   }
| j                  |
      }
| j                  |
      }|j                  d|      }d}|t               } |||      }|s|f|	dd z   }||f|z   S |S t        |||	j                        S )a[  
        input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        Nr   r>   r8  ru   rG  )r]   r  r    r9  rY   r   rT   rY  r	   r   r   )r\   rc   r@   r=   rd   r5  r   r   num_choicesr   r   rH  reshaped_logitsr  r;  r   s                   r)   ri   zFNetForMultipleChoice.forwardl  s   R &1%<k$++B]B],5,Aiooa(}GZGZ[\G]>G>SINN2y~~b'9:Y]	M[Mg,,R1D1DR1HImqGSG_|((\->->r-BCei ( r=#5#5b#9=;M;Mb;QR 	 )))%'!5#  
  
]3/ ++b+6')HOV4D%''!"+5F)-)9TGf$EvE(d?ZaZoZoppr+   rI  )rj   rk   rl   rD   r   r   r"   r   r$  r   r   r   ri   rn   ro   s   @r)   rb  rb  `  s      -115/304)-/3&*LqELL)Lq !.Lq u||,	Lq
  -Lq &Lq 'tnLq d^Lq 
u//	0Lq Lqr+   rb  c                        e Zd Z fdZe	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     dee   dee   d	e	e
ef   fd
       Z xZS )FNetForTokenClassificationc                 ,   t         |   |       |j                  | _        t        |      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        | j                          y r-   rW  r[   s     r)   rD   z#FNetForTokenClassification.__init__  si      ++f%	zz&"<"<=))F$6$68I8IJ 	r+   rc   r@   r=   rd   r5  r   r   r   c                    ||n| j                   j                  }| j                  ||||||      }|d   }	| j                  |	      }	| j	                  |	      }
d}|<t               } ||
j                  d| j                        |j                  d            }|s|
f|dd z   }||f|z   S |S t        ||
|j                        S )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr8  r   r>   ru   rG  )
r]   r  r   rT   rY  r	   r9  rX  r   r   )r\   rc   r@   r=   rd   r5  r   r   r   r   rH  r  r;  r   s                 r)   ri   z"FNetForTokenClassification.forward  s     &1%<k$++B]B])))%'!5#  
 "!*,,71')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$$vWMbMbccr+   rI  )rj   rk   rl   rD   r   r   r"   r   r$  r   r   r   ri   rn   ro   s   @r)   ri  ri    s    
  -115/304)-/3&*(dELL)(d !.(d u||,	(d
  -(d &(d 'tn(d d^(d 
u++	,(d (dr+   ri  c                       e Zd Z fdZe	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     dee   d	ee   d
e	e
ef   fd       Z xZS )FNetForQuestionAnsweringc                     t         |   |       |j                  | _        t        |      | _        t        j                  |j                  |j                        | _        | j                          y r-   )
rC   rD   rX  r  r   r   rP   rG   
qa_outputsr  r[   s     r)   rD   z!FNetForQuestionAnswering.__init__  sS      ++f%	))F$6$68I8IJ 	r+   rc   r@   r=   rd   start_positionsend_positionsr   r   r   c	                    ||n| j                   j                  }| j                  ||||||      }	|	d   }
| j                  |
      }|j	                  dd      \  }}|j                  d      j                         }|j                  d      j                         }d }||t        |j                               dkD  r|j                  d      }t        |j                               dkD  r|j                  d      }|j                  d      }|j                  d|      }|j                  d|      }t        |      } |||      } |||      }||z   dz  }|s||f|	dd  z   }||f|z   S |S t        ||||	j                        S )	Nr8  r   r   r>   rv   )ignore_indexru   )r  start_logits
end_logitsr   )r]   r  r   ro  splitr`  
contiguouslenrY   clampr	   r   r   )r\   rc   r@   r=   rd   rp  rq  r   r   r   r   rH  rt  ru  r:  ignored_indexr;  
start_lossend_lossr   s                       r)   ri   z FNetForQuestionAnswering.forward  s    &1%<k$++B]B])))%'!5#  
 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+,:]d]r]r
 	
r+   r>  )rj   rk   rl   rD   r   r   r"   r   r$  r   r   r   ri   rn   ro   s   @r)   rm  rm    s    	  -115/3042604/3&*4
ELL)4
 !.4
 u||,	4

  -4
 "%,,/4
  -4
 'tn4
 d^4
 
u22	34
 4
r+   rm  )
rA  rb  rK  r&  rm  rU  ri  r   r  r   )Jrm   rN  dataclassesr   	functoolsr   typingr   r   r"   torch.utils.checkpointr   torch.nnr   r	   r
   utilsr   r   scipyr   activationsr   modeling_layersr   modeling_outputsr   r   r   r   r   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   configuration_fnetr   
get_loggerrj   loggerr*   r.   r6   Moduler8   rq   r   r   r   r   r   r   r   r   r   r   r   r   r   r
  r  r&  rA  rK  rU  rb  ri  rm  __all__r   r+   r)   <module>r     s     !  "    A A 7  ! 9
 
 
 . 6  * 
		H	%M>
 :RYY :z#		 #Lbii 
299 
ryy   * 6a")) a8  ")) "*299 *4!bii !&bii &	9299 	9 */ * *. 
={ = =$ b
# b
 b
J V
, V
V
r ;r) ;r ;r| 
R
$7 R

R
j Fg$7 FgFgR Xq/ Xq Xqv 6d!4 6d 6dr A
2 A
 A
Hr+   