
    rh                     r   d Z ddlZddlmZ ddlmZmZ ddlZddl	Z	ddl	m
Z
 ddlmZmZmZ ddlmZ dd	lmZmZmZmZmZmZ dd
lmZ ddlmZmZmZ ddlmZ  ej@                  e!      Z"dZ#d Z$ G d de
jJ                        Z& G d de
jJ                        Z'de	jP                  de)de)de	jP                  fdZ* G d de
jJ                        Z+ G d de
jJ                        Z, G d de
jJ                        Z- G d d e
jJ                        Z.	 dGd!e	jP                  d"e)d#e)d$e/d%e/de	jP                  fd&Z0 G d' d(e
jJ                        Z1 G d) d*e
jJ                        Z2e G d+ d,e             Z3 G d- d.e
jJ                        Z4e ed/0       G d1 d2e                    Z5 ed30       G d4 d5e3             Z6e G d6 d7e3             Z7 ed80       G d9 d:e3             Z8e G d; d<e3             Z9 ed=0       G d> d?e3             Z:e G d@ dAe3             Z;e G dB dCe3             Z<e G dD dEe3             Z=g dFZ>y)Hz!PyTorch Funnel Transformer model.    N)	dataclass)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)ModelOutputauto_docstringlogging   )FunnelConfigg    .Ac                 |   	 ddl }ddl}ddl}t        j                  j                  |      }t        j                  d|        |j                  j                  |      }g }g }	|D ]^  \  }
}t        j                  d|
 d|        |j                  j                  ||
      }|j                  |
       |	j                  |       ` ddd	d
ddddddddddd}t        ||	      D ]  \  }
}|
j                  d      }
t!        d |
D              r(t        j                  ddj#                  |
              R|
d   dk(  r[| }d}|
dd D ]  }t%        |t&              s|j)                  d|      rt+        |j-                  d|      j/                         d         }||j0                  k  rQd}||j2                  |   k\  r*||j2                  |   z  }|dz  }||j2                  |   k\  r*|j4                  |   |   }||j0                  z  }|j6                  |   }|dk(  rt%        |t8              r|j:                  } n%||v rt=        |||         }		 t=        ||      } |rtE        |jB                        tE        |jB                        k7  r|jG                  |jB                        }dk(  r |jH                  |      }tK        jL                  |      |_'         | S # t        $ r t        j                  d        w xY w# t>        $ r. tA        ddj#                  |
       |jB                         d}Y  w xY w)z'Load tf checkpoints in a pytorch model.r   NzLoading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see https://www.tensorflow.org/install/ for installation instructions.z&Converting TensorFlow checkpoint from zLoading TF weight z with shape k_headq_headv_head	post_projlinear_1linear_2	attentionffnweightbiasword_embeddings
embeddings)kqvolayer_1layer_2rel_attnffkernelgammabetalookup_tableword_embeddinginput/c              3   $   K   | ]  }|d v  
 yw))adam_vadam_mAdamWeightDecayOptimizerAdamWeightDecayOptimizer_1global_stepN ).0ns     }/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/funnel/modeling_funnel.py	<genexpr>z,load_tf_weights_in_funnel.<locals>.<genexpr>\   s      
 nn
s   z	Skipping 	generatorFr   z	layer_\d+zlayer_(\d+)rTr-   )(renumpy
tensorflowImportErrorloggererrorospathabspathinfotrainlist_variablesload_variableappendzipsplitanyjoin
isinstanceFunnelPositionwiseFFN	fullmatchintsearchgroupsnum_hidden_layersblock_sizesblockslayersFunnelRelMultiheadAttentionr_kernelgetattrAttributeErrorprintshapelenreshape	transposetorch
from_numpydata)modelconfigtf_checkpoint_pathrA   nptftf_path	init_varsnamesarraysnamerb   array
_layer_mappointerskippedm_namelayer_index	block_idxs                      r=   load_tf_weights_in_funnelrz   .   sI   
 ggoo01G
KK8	BC''0IEF  e(l5'BC&&w5Te	  +J" 5&) +3ezz#  

 
 KK)CHHTN#3457k!12h 	Fg'<=",,|]cBd!"))NF"C"J"J"LQ"OP!9!99 !I%););I)FF#v'9'9)'DD!Q	 &););I)FF &nnY7DG6#;#;;K%nn[9G3:g7R#S!**:%!':f+=>%gv6G'	0 7==!S%55gmm4!$U+ ++E2GLW+3Z La  Q	
 	J & Ichhtn%56D"Gs   K! L! L3L;:L;c                        e Zd Zdeddf fdZ	 ddeej                     deej                     dej                  fdZ xZ	S )	FunnelEmbeddingsrj   returnNc                 @   t         |           t        j                  |j                  |j
                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                        | _        y )N)padding_idxeps)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idr#   	LayerNormd_modellayer_norm_eps
layer_normDropouthidden_dropoutdropoutselfrj   	__class__s     r=   r   zFunnelEmbeddings.__init__   sh    !||F,=,=v?Q?Q_e_r_rs,,v~~6;P;PQzz&"7"78    	input_idsinputs_embedsc                 p    || j                  |      }| j                  |      }| j                  |      }|S N)r#   r   r   )r   r   r   r$   s       r=   forwardzFunnelEmbeddings.forward   s<       00;M__]3
\\*-
r   NN)
__name__
__module____qualname__r   r   r   rf   Tensorr   __classcell__r   s   @r=   r|   r|      sS    9| 9 9 ae!%,,/GOPUP\P\G]	r   r|   c                       e Zd ZU dZdZeed<   deddf fdZ	 	 d de	j                  d	ee	j                     d
ee	j                     dee	j                     fdZd
e	j                  de	j                  fdZdede	j                  de	j                   deee	j                     eee	j                        f   fdZde	j                  defdZd!de	j                  dedede	j                  fdZdee	j                  ee	j                     ee	j                     f   deeee   ee   f   de	j                  fdZ	 d"dee	j                  ee	j                     ee	j                     f   dedede	j                  fdZdee	j                     dee	j                  ee	j                     f   fdZdee	j                     dee	j                     fdZ xZS )#FunnelAttentionStructurez>
    Contains helpers for `FunnelRelMultiheadAttention `.
       cls_token_type_idrj   r}   Nc                     t         |           || _        t        j                  |j
                        | _        t        j                  |j
                        | _        d | _        y r   )	r   r   rj   r   r   r   sin_dropoutcos_dropoutpooling_multr   s     r=   r   z!FunnelAttentionStructure.__init__   sM    ::f&;&;<::f&;&;< !r   r   attention_masktoken_type_idsc                 b   d| _         |j                  d      x| _        }| j                  ||j                  |j
                        }|| j                  |      nd}| j                  j                  r7t        j                  j                  |j                  |dz
  |dz
  g      d      nd}||||fS )zCReturns the attention inputs associated to the inputs of the model.r   N)r   r   r   r   )r   sizeseq_lenget_position_embedsdtypedevicetoken_type_ids_to_matrj   separate_clsr   
functionalpadnew_ones)r   r   r   r   r   position_embedstoken_type_matcls_masks           r=   init_attention_inputsz.FunnelAttentionStructure.init_attention_inputs   s     !.!3!3A!66w227M<O<OQ^QeQefGUGa33NCgk {{'' MMm44gk7Q;5OPR^_ 	
  JJr   c                     |dddddf   |dddf   k(  }|| j                   k(  }|dddddf   |dddf   z  }||z  S )z-Convert `token_type_ids` to `token_type_mat`.N)r   )r   r   r   cls_idscls_mats        r=   r   z.FunnelAttentionStructure.token_type_ids_to_mat   sY    '1d
3~ag7NN D$:$::!Q*%4(88''r   r   r   r   c                 z   | j                   j                  }| j                   j                  dk(  rEt        j                  d|dt        j
                  |      j                  |      }t        j                  d|dz  dt        j
                  |      j                  |      }dd||dz  z  z  z  }|dddf   |d   z  }t        j                  |      }	| j                  |	      }
t        j                  |      }| j                  |      }t        j                  |
|
gd	
      }t        j                  ||	gd	
      }t        j                  ||gd	
      }t        j                  |	 |gd	
      }||||fS t        j                  d|dz  dt        j
                  |      j                  |      }dd||dz  z  z  z  }t        j                  | dz  |dz  dt        j
                  |      j                  |      }|dz  }|dddf   |d   z  }| j                  t        j                  |            }	| j                  t        j                  |            }t        j                  |	|gd	
      }t        j                  d|t        j
                  |      j                  |      }|}g }t        d| j                   j                        D ]  }|dk(  rd}ns| j                  ||      }d|dz
  z  }| j                  |||d      }|dddf   |z   }|j!                  |j#                  d      |      }t        j$                  |d|      }|}d|z  }| j                  ||      }|dddf   |z   }|j!                  |j#                  d      |      }t        j$                  |d|      }|j'                  ||g        |S )a  
        Create and cache inputs related to relative position encoding. Those are very different depending on whether we
        are using the factorized or the relative shift attention:

        For the factorized attention, it returns the matrices (phi, pi, psi, omega) used in the paper, appendix A.2.2,
        final formula.

        For the relative shift attention, it returns all possible vectors R used in the paper, appendix A.2.1, final
        formula.

        Paper link: https://huggingface.co/papers/2006.03236
        
factorizedr         ?r   r   r   r   i'  Ndim)shift)rj   r   attention_typerf   arangeint64tosinr   cosr   catrange
num_blocksstride_pool_posrelative_posexpandr   gatherrN   )r   r   r   r   r   pos_seqfreq_seqinv_freqsinusoid	sin_embedsin_embed_d	cos_embedcos_embed_dphipsipiomega
rel_pos_idzero_offset	pos_embedpos
pooled_posposition_embeds_listblock_indexposition_embeds_poolingstriderel_posposition_embeds_no_poolings                               r=   r   z,FunnelAttentionStructure.get_position_embeds   s|    ++%%;;%%5 ll1gs%++fUXXY^_G||Aw!|STZ[^^_deHEh'Q,&?@AHq$w'(4.8H		(+I**95K		(+I**95K))[+6B?C))Y	2;CK52>BII	z952>ES%(( ||Aw!|STZ[^^_deHEh'Q,&?@AHwhlGaKEKK`fgjjkpqJ!A+K!!T'*Xd^;H((8)<=I((8)<=I		9i"8bAI,,q'VLOOPUVCJ#% $Q(>(>? c !#.2+!%!5!5c;!GJ ;?3F"//VZq/QG%ag.<G%nnW\\!_gFG.3ll9a.Q+ !K++C8!!T'*[8!..a'B-2\\)Q-P*$++-GI`,ab9c: ('r   pos_idr   c                     | j                   j                  rW|j                  d|z   dz   g      }| j                   j                  r|dd n|dd }t	        j
                  ||ddd   gd      S |ddd   S )ze
        Pool `pos_id` while keeping the cls token separate (if `config.separate_cls=True`).
        r   r   r   Nr   )rj   r   
new_tensortruncate_seqrf   r   )r   r   r   cls_pospooled_pos_ids        r=   r   z(FunnelAttentionStructure.stride_pool_pos  s     ;;##
 ''1k>):Q)>(?@G,0KK,D,DF1RL&QRQS*M99g}SqS'9:A>>#A#;r   r   r   r   c                     ||}|d   |d   z
  }|t        |      z  }|||z  z   }|d   |d   z
  }t        j                  ||dz
  | t        j                  |j                        S )zV
        Build the relative positional vector between `pos` and `pooled_pos`.
        r   r   r   r   )rc   rf   r   longr   )	r   r   r   r   r   	ref_point
num_removemax_distmin_dists	            r=   r   z%FunnelAttentionStructure.relative_pos$  sx     JqMCF*	S_,
zF22a=3r7*||HhlVG5::VYV`V`aar   tensoraxisc                 H    |yt        t        t        f      rD ]  } j                  ||      } |S t        |t        t        f      r t	        |       fd|D              S |j
                  z   j                  j                  r# j                  j                  rt        ddd      nt        ddd      }t        d      gz  |gz   } j                  j                  r9t        d      gz  t        dd      gz   }t        j                  ||   |g      }||   S )zT
        Perform pooling by stride slicing the tensor along the given axis.
        Nc              3   B   K   | ]  }j                  |        y wr   )stride_pool)r;   xr   r   s     r=   r>   z7FunnelAttentionStructure.stride_pool.<locals>.<genexpr>E  s     Ja 0 0D 9Js   r   r   r   )r   )rS   listtupler   typendimrj   r   r   slicerf   r   )r   r   r   ax
axis_slice	enc_slice	cls_slices   ` `    r=   r   z$FunnelAttentionStructure.stride_pool2  s    > dT5M* 6))&"56M fudm,4<J6JJJ 	 #'++":":t{{?W?WE$A]bcgimop]q 	 4[MD(J<7	;;##t,dA/??IYYy 16:FFi  r   modec                     yt        t        t        f      r t               fdD              S  j                  j
                  rE j                  j                  rddddf   n}t        j                  ddddf   |gd      j                  }|dk(  rddddddf   n|dk(  rdddddddf   dfdk(  r$t        j                  j                  d	
      n_dk(  r$t        j                  j                  d	
      n6dk(  r&t        j                  j                   d	
       nt        d      |dk(  rddddddf   S |dk(  r	dddf   S S )z3Apply 1D pooling to a tensor of size [B x T (x H)].Nc              3   F   K   | ]  }j                           yw))r  r   N)pool_tensor)r;   r   r  r   r   r   s     r=   r>   z7FunnelAttentionStructure.pool_tensor.<locals>.<genexpr>\  s$     cWX 0 0d6 0 Rcs   !r   r   r   r   r
   meanT)r   	ceil_modemaxminz0The supported modes are 'mean', 'max' and 'min'.r   )rS   r   r   r   rj   r   r   rf   r   r   r   r   
avg_pool2d
max_pool2dNotImplementedError)r   r   r  r   suffixr   s   ````  r=   r  z$FunnelAttentionStructure.pool_tensorS  s{    > fudm,4<c\bccc;;##'+{{'?'?VAssF^VFYYq"1"uv6A>F{{19AtQ,-FQYAtQM*F!6>]]--ffVW[-\FU]]]--ffVW[-\FU]mm..wvY].^^F%&XYY19!Q1*%%QY!Q$<r   attention_inputsc                    |\  }}}}| j                   j                  r| j                   j                  dk(  r| j                  |dd d      |dd z   }| j                  |d      }| j                  |d      }| j	                  || j                   j
                        }n| xj                  dz  c_        | j                   j                  dk(  r| j                  |d      }| j                  |ddg      }| j                  |ddg      }| j	                  |d      }| j	                  || j                   j
                        }||||f}||fS )zTPool `output` and the proper parts of `attention_inputs` before the attention layer.r   Nr   r   r   r  r	  )rj   pool_q_onlyr   r   r  pooling_typer   )r   outputr  r   r   r   r   s          r=   pre_attention_poolingz.FunnelAttentionStructure.pre_attention_poolingy  sM    EUA;;""{{))\9"&"2"2?2A3F"J_]^]_M`"`!--na@N''!4H%%f4;;3K3K%LF"{{))\9"&"2"2?A"F!--nq!fEN''1a&9H!--n5-IN%%f4;;3K3K%LF+^^XV'''r   c                 L   |\  }}}}| j                   j                  r| xj                  dz  c_        | j                   j                  dk(  r|dd | j	                  |dd d      z   }| j	                  |d      }| j	                  |d      }| j                  |d      }||||f}|S )zFPool the proper parts of `attention_inputs` after the attention layer.r   r   Nr   r   r	  r  )rj   r  r   r   r   r  )r   r  r   r   r   r   s         r=   post_attention_poolingz/FunnelAttentionStructure.post_attention_pooling  s    DTA;;"""{{))\9"1"1"58H8HYZY[I\^_8`"`!--na@N''!4H!--n5-IN+^^XVr   r   Nr   )r  r   )r   r   r   __doc__r   rV   __annotations__r   r   rf   r   r   r   r   r   r   r   r   r   r   r   r   r   strr  r  r  r   r   s   @r=   r   r      sC    s!| ! ! 2615	K||K !.K !.	K
 
u||	K((ELL (U\\ (N(N(#(;;N(8=N(	uU\\"Dell);$<<	=N(`ell  b bc bSV b_d_k_k b!ellE%,,$7ell9KKL! CsT#Y./! 
	!D wx$ELL%*=tELL?QQR$Z]$ps$	$L((-ell(;(	u||U5<<00	1(, uU\\7J  uUZUaUaOb  r   r   positional_attncontext_lenr   r}   c                     | j                   \  }}}}t        j                  | ||||g      } | d d d d |d d d f   } t        j                  | |||||z
  g      } | dd |f   } | S )N.)rb   rf   rd   )r  r  r   
batch_sizen_headr   max_rel_lens          r=   _relative_shift_gatherr!    s    />/D/D,J mmOj&+W^5_`O%aEFAo6OmmOj&'S^afSf5ghO%c<K<&78Or   c                        e Zd Zdededdf fdZddZddZ	 ddej                  d	ej                  d
ej                  de
ej                     dede
ej                  df   fdZ xZS )r]   rj   r   r}   Nc                 J   t         |           || _        || _        |j                  |j
                  |j                  }}}t        j                  |j                        | _	        t        j                  |j                        | _
        t        j                  |||z  d      | _        t        j                  |||z        | _        t        j                  |||z        | _        t        j                  t!        j"                  ||g            | _        t        j                  t!        j"                  ||g            | _        t        j                  t!        j"                  |||g            | _        t        j                  t!        j"                  ||g            | _        t        j                  t!        j"                  d||g            | _        t        j                  ||z  |      | _        t        j0                  ||j2                        | _        d|dz  z  | _        y )NF)r"   r   r   r   g      ?)r   r   rj   r   r   r  d_headr   r   r   attention_dropoutLinearr   r   r   	Parameterrf   zerosr_w_biasr_r_biasr^   r_s_bias	seg_embedr   r   r   r   scale)r   rj   r   r   r  r$  r   s         r=   r   z$FunnelRelMultiheadAttention.__init__  s   &"(..&-- jj)>)>?!#F,D,D!Eii&uEii&9ii&9U[[&&1A%BCU[[&&1A%BCU[['661J%KLU[[&&1A%BCekk1ff2E&FG6F?G<,,wF4I4IJFCK(
r   c                 ~   | j                   j                  dk(  r|\  }}}}| j                  | j                  z  }	| j                  }
t        j                  d||	z   |
      }||dddf   z  }||dddf   z  }t        j                  d||      t        j                  d||      z   }n|j                  d   |k7  rdnd}|| j                     |dz
     }| j                  | j                  z  }| j                  }
t        j                  d||
      }t        j                  d||z   |      }t        |||      }|||z  }|S )	z5Relative attention score for the positional encodingsr   zbinh,dnh->bindNzbind,jd->bnijr   r   ztd,dnh->tnhzbinh,tnh->bnit)
rj   r   r*  r-  r^   rf   einsumrb   r   r!  )r   r   r   r  r   r   r   r   r   uw_rq_r_attentionq_r_attention_1q_r_attention_2r  r   r@   r'   r_heads                      r=   relative_positional_attentionz9FunnelRelMultiheadAttention.relative_positional_attention  sK    ;;%%5 #2CS%

*A--C "LL)96A:sKM+c!T'l:O+bDk9O $ll?OSQTYT`T`%U O  aK7AQE   0 01%!)<A

*A--C \\-C8F#ll+;VaZPO4_kSXYOx'Or   c                    |y|j                   \  }}}| j                  | j                  z  }t        j                  d||z   | j
                        }|dddf   j                  ||j                   d   ||g      }t        j                  |dd      \  }	}
t        j                  ||
j                  |j                         |	j                  |j                               }|||z  }|S )z/Relative attention score for the token_type_idsNr   zbind,snd->bnisr   r   r   r   )	rb   r+  r-  rf   r/  r,  r   rP   where)r   r   r   r   r  r   r  r+  token_type_biasdiff_token_typesame_token_typetoken_type_attns               r=   relative_token_type_attentionz9FunnelRelMultiheadAttention.relative_token_type_attention  s    !+9+?+?(
G[ ==4::-  ,,'7(9JDNN['4077V\\RS_V]_j8kl+0;;r+R(++O22>3G3GH/J`J`aoauauJv
 x'Or   querykeyvaluer  output_attentions.c                    |\  }}}}	|j                   \  }
}}|j                   d   }| j                  j                  | j                  j                  }}| j	                  |      j                  |
|||      }| j                  |      j                  |
|||      }| j                  |      j                  |
|||      }|| j                  z  }| j                  | j                  z  }t        j                  d||z   |      }| j                  ||||	      }| j                  |||	      }||z   |z   }|j                  }|j                         }|%|t         d|d d d d f   j                         z
  z  z
  }t        j"                  |d|      }| j%                  |      }t        j                  d||      }| j'                  |j)                  |
|||z              }| j+                  |      }| j-                  ||z         }|r||fS |fS )Nr   zbind,bjnd->bnijr   )r   r   zbnij,bjnd->bind)rb   rj   r  r$  r   viewr   r   r-  r)  rf   r/  r6  r=  r   floatINFsoftmaxr%  r   rd   r   r   )r   r>  r?  r@  r  rA  r   r   r   r   r  r   _r  r  r$  r   r   r   r)  content_scorer  r<  
attn_scorer   	attn_probattn_vecattn_outr  s                                r=   r   z#FunnelRelMultiheadAttention.forward  s    EUA!&
GQiil++T[[-?-? U#((WffMS!&&z;OU#(([&&Q$**$==4::-%68I6R<<_fVackl<<^VU]^ #_4F
   %%'
%#cQ41N1T1T1V-V&WWJMM*"EB	**95	 << 19fE >>("2"2:wQW"XY&&x0!12&7	"FfYFr   r   F)r   r   r   r   rV   r   r6  r=  rf   r   r   boolr   r   r   s   @r=   r]   r]     s    )| )# )$ ).(T< #(3G||3G \\3G ||	3G
  -3G  3G 
u||S 	!3Gr   r]   c                   `     e Zd Zdeddf fdZdej                  dej                  fdZ xZS )rT   rj   r}   Nc                    t         |           t        j                  |j                  |j
                        | _        t        |j                     | _	        t        j                  |j                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        t        j                  |j                  |j                         | _        y r   )r   r   r   r&  r   d_innerr   r   
hidden_actactivation_functionr   activation_dropoutr   r   r   r   r   r   r   s     r=   r   zFunnelPositionwiseFFN.__init__>  s    		&..&..A#)&*;*;#< "$**V-F-F"G		&..&..Azz&"7"78,,v~~v7L7LMr   hiddenc                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }| j	                  |      }| j                  ||z         S r   )r   rS  rT  r   r   r   )r   rU  hs      r=   r   zFunnelPositionwiseFFN.forwardG  s^    MM&!$$Q'##A&MM!LLOvz**r   )	r   r   r   r   r   rf   r   r   r   r   s   @r=   rT   rT   =  s4    N| N N+ell +u|| +r   rT   c                        e Zd Zdededdf fdZ	 ddej                  dej                  dej                  d	ede	f
d
Z
 xZS )FunnelLayerrj   r   r}   Nc                 d    t         |           t        ||      | _        t	        |      | _        y r   )r   r   r]   r   rT   r    )r   rj   r   r   s      r=   r   zFunnelLayer.__init__Q  s(    4V[I(0r   r>  r?  r@  rA  c                 n    | j                  |||||      }| j                  |d         }|r||d   fS |fS )NrA  r   r   )r   r    )r   r>  r?  r@  r  rA  attnr  s           r=   r   zFunnelLayer.forwardV  sH     ~~eS%1AUf~g$q'"$5Q DF9Dr   rM  )r   r   r   r   rV   r   rf   r   rN  r   r   r   r   s   @r=   rY  rY  P  si    1| 1# 1$ 1 #(
E||
E \\
E ||	
E  
E 

Er   rY  c                        e Zd Zdeddf fdZ	 	 	 	 	 ddej                  deej                     deej                     ded	ed
ede	e
ef   fdZ xZS )FunnelEncoderrj   r}   Nc                 T   t         |           || _        t        |      | _        t        j                  t        |j                        D cg c];  \  }}t        j                  t        |      D cg c]  }t        ||       c}      = c}}}      | _        y c c}w c c}}}w r   )r   r   rj   r   attention_structurer   
ModuleList	enumeraterZ   r   rY  r[   )r   rj   r   
block_sizerG  r   s        r=   r   zFunnelEncoder.__init__d  s    #;F#C mm 099K9K/L +K zIZ[A{6;?[\
[s   $B#3B	B#B#r   r   r   rA  output_hidden_statesreturn_dictc           
         |j                  |      }| j                  j                  |||      }|}|r|fnd }	|rdnd }
t        | j                        D ]  \  }}|j                  d      | j                  j                  rdndkD  }|xr |dkD  }|r| j                  j                  ||      \  }}t        |      D ]  \  }}t        | j                  j                  |         D ]{  }|dk(  xr	 |dk(  xr |}|r}| j                  j                  r|n|x}}n|x}x}} ||||||      }|d   }|r| j                  j                  |      }|r|
|dd  z   }
|sv|	|fz   }	}   |st        d ||	|
fD              S t        ||	|
      S )	Nr   r   r:   r   r   r   r\  c              3   &   K   | ]	  }||  y wr   r:   r;   r'   s     r=   r>   z(FunnelEncoder.forward.<locals>.<genexpr>       aqSTS`a   last_hidden_statehidden_states
attentions)type_asra  r   rc  r[   r   rj   r   r  r   block_repeatsr  r  r   r   )r   r   r   r   rA  re  rf  r  rU  all_hidden_statesall_attentionsr   blockpooling_flagpooled_hiddenrx   layerrepeat_index
do_poolingr>  r?  r@  layer_outputs                          r=   r   zFunnelEncoder.forwardo  s    (//>33II)) J 

 0D],$0d"+DKK"8 	JK!;;q>$++2J2JQPQRL';K!OL262J2J2`2`,3// '0&6 J"U$)$++*C*CK*P$Q JL".!"3!\+:J!\P\J! -040G0Gf]Ze.444e#(U<L`q#rL)!_F!+/+C+C+Z+Z[k+l(()7,qr:J)J+,=	,I)JJ	J2 aV->$OaaaGXesttr   NNFFTr   r   r   r   r   rf   r   r   rN  r   r   r   r   r   r   s   @r=   r_  r_  c  s    	
| 	
 	
 2615"'%* 0u||0u !.0u !.	0u
  0u #0u 0u 
uo%	&0ur   r_  r   r   
target_lenr   r   c           	      6   |dk(  r| S |r| ddddf   }| ddddf   } t        j                  | |d      }|rT|r)t        j                  j	                  |ddd|dz
  ddf      }|ddd|dz
  f   }t        j
                  |gd      }|S |ddd|f   }|S )z{
    Upsample tensor `x` to match `target_len` by repeating the tokens `stride` time on the sequence length dimension.
    r   N)repeatsr   r   r   )rf   repeat_interleaver   r   r   r   )r   r   r~  r   r   clsr  s          r=   upsampler    s     {2A2haeH$$QA>F]]&&v1a!Q/JKF+Z!^++,C=a0 M ;J;'Mr   c                        e Zd Zdeddf fdZ	 	 	 	 	 ddej                  dej                  deej                     deej                     d	ed
edede	e
ef   fdZ xZS )FunnelDecoderrj   r}   Nc           	          t         |           || _        t        |      | _        t        j                  t        |j                        D cg c]  }t        |d       c}      | _
        y c c}w )Nr   )r   r   rj   r   ra  r   rb  r   num_decoder_layersrY  r\   )r   rj   rG  r   s      r=   r   zFunnelDecoder.__init__  sR    #;F#C mmU6KdKdEe$f[%;$fg$fs   A-final_hiddenfirst_block_hiddenr   r   rA  re  rf  c                    t        |dt        | j                  j                        dz
  z  |j                  d   | j                  j
                  | j                  j                        }||z   }	|r|	fnd }
|rdnd }| j                  j                  |	||      }| j                  D ]'  } ||	|	|	||      }|d   }	|r||dd  z   }|s"|
|	fz   }
) |st        d |	|
|fD              S t        |	|
|	      S )
Nr   r   )r   r~  r   r   r:   rh  r\  r   c              3   &   K   | ]	  }||  y wr   r:   rj  s     r=   r>   z(FunnelDecoder.forward.<locals>.<genexpr>  rk  rl  rm  )r  rc   rj   rZ   rb   r   r   ra  r   r\   r   r   )r   r  r  r   r   rA  re  rf  upsampled_hiddenrU  rs  rt  r  rx  r{  s                  r=   r   zFunnelDecoder.forward  s%    $T[[4459:)//21111
 "$66)=VI40d33II)) J 
 [[ 	BE 9I]noL!!_F !/,qr2B!B#$5	$A!	B aV->$OaaaGXesttr   r|  r}  r   s   @r=   r  r    s    h| h h 2615"'%* 'ull'u "LL'u !.	'u
 !.'u  'u #'u 'u 
uo%	&'ur   r  c                   d     e Zd ZdZdeddf fdZdej                  dej                  fdZ xZ	S )FunnelDiscriminatorPredictionszEPrediction module for the discriminator, made up of two dense layers.rj   r}   Nc                     t         |           || _        t        j                  |j
                  |j
                        | _        t        j                  |j
                  d      | _        y r  )r   r   rj   r   r&  r   densedense_predictionr   s     r=   r   z'FunnelDiscriminatorPredictions.__init__  sF    YYv~~v~~>
 "		&..! <r   discriminator_hidden_statesc                     | j                  |      }t        | j                  j                     |      }| j	                  |      j                  d      }|S )Nr   )r  r   rj   rR  r  squeeze)r   r  ro  logitss       r=   r   z&FunnelDiscriminatorPredictions.forward  sJ    

#>?t{{556}E&&}5==bAr   )
r   r   r   r  r   r   rf   r   r   r   r   s   @r=   r  r    s4    O=| = =5<< ELL r   r  c                   &    e Zd ZU eed<   eZdZd Zy)FunnelPreTrainedModelrj   funnelc                    |j                   j                  }|j                  d      dk7  rt        |dd       | j                  j
                  >|j                  j                  \  }}t        j                  dt        ||z         z        }n| j                  j
                  }t        j                  j                  |j                  |       t        |dd       +t        j                  j                  |j                  d       y y |dk(  r<t        j                  j!                  |j"                  | j                  j$                  	       t        j                  j!                  |j&                  | j                  j$                  	       t        j                  j!                  |j(                  | j                  j$                  	       t        j                  j!                  |j*                  | j                  j$                  	       t        j                  j!                  |j,                  | j                  j$                  	       y |d
k(  r| j                  j
                  dn| j                  j
                  }t        j                  j                  |j.                  j                  |       |j.                  j0                  F|j.                  j                  j2                  |j.                  j0                     j5                          y y y )Nr&  r   r!   r   )stdr"   g        r]   )br|   )r   r   findr_   rj   initializer_stdr!   rb   rl   sqrtrD  r   initnormal_	constant_r"   uniform_r)  initializer_ranger*  r^   r+  r,  r#   r   rh   zero_)r   module	classnamefan_outfan_inr  s         r=   _init_weightsz#FunnelPreTrainedModel._init_weights  s   $$--	>>(#r)vx.:;;..6&,mm&9&9OGV''#fw.>(?"?@C++55C37vvt,8!!&++s3 977GGV__0M0MNGGV__0M0MNGGV__0M0MNGGV__0M0MNGGV--1N1NO,,44<#$++B]B]CGGOOF2299sOC%%11=&&--2263I3I3U3UV\\^ > -r   N)	r   r   r   r   r  rz   load_tf_weightsbase_model_prefixr  r:   r   r=   r  r    s    /O _r   r  c                   d     e Zd Zdededdf fdZdej                  dej                  fdZ xZ	S )FunnelClassificationHeadrj   n_labelsr}   Nc                    t         |           t        j                  |j                  |j                        | _        t        j                  |j                        | _        t        j                  |j                  |      | _	        y r   )
r   r   r   r&  r   linear_hiddenr   r   r   
linear_out)r   rj   r  r   s      r=   r   z!FunnelClassificationHead.__init__  sU    YYv~~v~~Fzz&"7"78))FNNH=r   rU  c                     | j                  |      }t        j                  |      }| j                  |      }| j	                  |      S r   )r  rf   tanhr   r  )r   rU  s     r=   r   z FunnelClassificationHead.forward  s=    ##F+F#f%v&&r   )
r   r   r   r   rV   r   rf   r   r   r   r   s   @r=   r  r    s8    >| >s >t >'ell 'u|| 'r   r  z2
    Output type of [`FunnelForPreTraining`].
    )custom_introc                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eeej                        ed<   dZeeej                        ed<   y)FunnelForPreTrainingOutputa1  
    loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
        Total loss of the ELECTRA-style objective.
    logits (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
        Prediction scores of the head (scores for each token before SoftMax).
    Nlossr  ro  rp  )r   r   r   r  r  r   rf   FloatTensorr  r  ro  r   rp  r:   r   r=   r  r  &  sg     )-D(5$$
%,*.FHU&&'.8<M8E%"3"345<59Ju00129r   r  z
    The base Funnel Transformer Model transformer outputting raw hidden-states without upsampling head (also called
    decoder) or any task-specific head on top.
    c                   r    e Zd Zdeddf fdZdej                  fdZdej                  ddfdZe		 	 	 	 	 	 	 	 	 dde
ej                     d	e
ej                     d
e
ej                     de
ej                     de
ej                     de
ej                     de
e   de
e   de
e   deeef   fd       Z xZS )FunnelBaseModelrj   r}   Nc                     t         |   |       t        |      | _        t	        |      | _        | j                          y r   )r   r   r|   r$   r_  encoder	post_initr   s     r=   r   zFunnelBaseModel.__init__A  s4     *62$V, 	r   c                 .    | j                   j                  S r   r$   r#   r   s    r=   get_input_embeddingsz$FunnelBaseModel.get_input_embeddingsJ      ...r   new_embeddingsc                 &    || j                   _        y r   r  r   r  s     r=   set_input_embeddingsz$FunnelBaseModel.set_input_embeddingsM      *8'r   r   r   r   position_ids	head_maskr   rA  re  rf  c
                 V   ||n| j                   j                  }||n| j                   j                  }|	|	n| j                   j                  }	||t	        d      |#| j                  ||       |j                         }
n!||j                         d d }
nt	        d      ||j                  n|j                  }|t        j                  |
|      }|&t        j                  |
t        j                  |      }| j                  ||      }| j                  ||||||	      }|S )NDYou cannot specify both input_ids and inputs_embeds at the same timer   5You have to specify either input_ids or inputs_embedsr   r   r   r   r   rA  re  rf  )rj   rA  re  use_return_dict
ValueError%warn_if_padding_and_no_attention_maskr   r   rf   onesr(  r   r$   r  )r   r   r   r   r  r  r   rA  re  rf  input_shaper   encoder_outputss                r=   r   zFunnelBaseModel.forwardP  s=    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66y.Q#..*K&',,.s3KTUU%.%:!!@T@T!"ZZFCN!"[[EJJvVN 	O,,))/!5# ' 
 r   	NNNNNNNNNr   r   r   r   r   r   r   r  r  r   r   rf   r   rN  r   r   r   r   r   r   s   @r=   r  r  :  s   |  /bll /92<< 9D 9  -11515/3,004,0/3&*/ELL)/ !./ !.	/
 u||,/ ELL)/  -/ $D>/ 'tn/ d^/ 
uo%	&/ /r   r  c                   2    e Zd Zdeddf fdZdej                  fdZdej                  ddfdZe		 	 	 	 	 	 	 dde
ej                     d	e
ej                     d
e
ej                     de
ej                     de
e   de
e   de
e   deeef   fd       Z xZS )FunnelModelrj   r}   Nc                     t         |   |       || _        t        |      | _        t        |      | _        t        |      | _        | j                          y r   )
r   r   rj   r|   r$   r_  r  r  decoderr  r   s     r=   r   zFunnelModel.__init__  sG     *62$V,$V, 	r   c                 .    | j                   j                  S r   r  r  s    r=   r  z FunnelModel.get_input_embeddings  r  r   r  c                 &    || j                   _        y r   r  r  s     r=   r  z FunnelModel.set_input_embeddings  r  r   r   r   r   r   rA  re  rf  c           	         ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }||t	        d      |#| j                  ||       |j                         }n!||j                         d d }nt	        d      ||j                  n|j                  }	|t        j                  ||	      }|&t        j                  |t        j                  |	      }| j                  ||      }| j                  ||||d|      }
| j                  |
d	   |
d
   | j                   j                  d	      |||||      }|s6d	}|d	   f}|r|d
z  }||
d
   ||   z   fz   }|r|d
z  }||
d   ||   z   fz   }|S t!        |d	   |r|
j"                  |j"                  z   nd |r|
j$                  |j$                  z         S d       S )Nr  r   r  r  r   r  Tr  r   r   )r  r  r   r   rA  re  rf  r   rm  )rj   rA  re  r  r  r  r   r   rf   r  r(  r   r$   r  r  rZ   r   ro  rp  )r   r   r   r   r   rA  re  rf  r  r   r  decoder_outputsidxoutputss                 r=   r   zFunnelModel.forward  sA    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66y.Q#..*K&',,.s3KTUU%.%:!!@T@T!"ZZFCN!"[[EJJvVN 	O,,))/!%# ' 
 ,,(+.q1$++2I2I!2LM))/!5# ' 
 C&q)+G#q!_Q%7/#:N%N$PP q!_Q%7/#:N%N$PPN-a0# +88?;X;XXTe22_5O5OO
 	

 lp
 	
r   )NNNNNNNr  r   s   @r=   r  r    s    |  /bll /92<< 9D 9  -1151504,0/3&*H
ELL)H
 !.H
 !.	H

  -H
 $D>H
 'tnH
 d^H
 
uo%	&H
 H
r   r  z
    Funnel Transformer model with a binary classification head on top as used during pretraining for identifying
    generated tokens.
    c                       e Zd Zdeddf fdZe	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     d	eej                     d
ee	   dee	   dee	   de
eef   fd       Z xZS )FunnelForPreTrainingrj   r}   Nc                     t         |   |       t        |      | _        t	        |      | _        | j                          y r   )r   r   r  r  r  discriminator_predictionsr  r   s     r=   r   zFunnelForPreTraining.__init__  s3     !&))G)O&r   r   r   r   r   labelsrA  re  rf  c	           	      `   ||n| j                   j                  }| j                  |||||||      }	|	d   }
| j                  |
      }d}|t	        j
                         }|a|j                  d|
j                  d         dk(  }|j                  d|
j                  d         |   }||   } |||j                               }n4 ||j                  d|
j                  d         |j                               }|s|f|	dd z   }||f|z   S |S t        |||	j                  |	j                        S )a"  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the ELECTRA-style loss. Input should be a sequence of tokens (see `input_ids`
            docstring) Indices should be in `[0, 1]`:

            - 0 indicates the token is an original token,
            - 1 indicates the token was replaced.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, FunnelForPreTraining
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("funnel-transformer/small")
        >>> model = FunnelForPreTraining.from_pretrained("funnel-transformer/small")

        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
        >>> logits = model(**inputs).logits
        ```Nr   r   r   rA  re  rf  r   r   r   r  r  ro  rp  )rj   r  r  r  r   r   rC  rb   rD  r  ro  rp  )r   r   r   r   r   r  rA  re  rf  r  discriminator_sequence_outputr  r  loss_fctactive_lossactive_logitsactive_labelsr  s                     r=   r   zFunnelForPreTraining.forward  sj   @ &1%<k$++B]B]&*kk))'/!5# '2 '
# )DA(F%//0MN++-H),11"6S6Y6YZ[6\]abb &B0M0S0STU0V WXc d &{ 3}/B/B/DEB0M0S0STU0V WY_YeYeYghY!<QR!@@F)-)9TGf$EvE)5CC2==	
 	
r   NNNNNNNN)r   r   r   r   r   r   r   rf   r   rN  r   r   r  r   r   r   s   @r=   r  r    s    |    -1151504)-,0/3&*B
ELL)B
 !.B
 !.	B

  -B
 &B
 $D>B
 'tnB
 d^B
 
u00	1B
 B
r   r  c                   X    e Zd ZdgZdeddf fdZdej                  fdZdej                  ddfdZ
e	 	 	 	 	 	 	 	 dd	eej                     d
eej                     deej                     deej                     deej                     dee   dee   dee   deeef   fd       Z xZS )FunnelForMaskedLMzlm_head.weightrj   r}   Nc                     t         |   |       t        |      | _        t	        j
                  |j                  |j                        | _        | j                          y r   )
r   r   r  r  r   r&  r   r   lm_headr  r   s     r=   r   zFunnelForMaskedLM.__init__:  sD     !&)yy1B1BC 	r   c                     | j                   S r   r  r  s    r=   get_output_embeddingsz'FunnelForMaskedLM.get_output_embeddingsC  s    ||r   r  c                     || _         y r   r  r  s     r=   set_output_embeddingsz'FunnelForMaskedLM.set_output_embeddingsF  s	    %r   r   r   r   r   r  rA  re  rf  c	           	         ||n| j                   j                  }| j                  |||||||      }	|	d   }
| j                  |
      }d}|Ft	               } ||j                  d| j                   j                        |j                  d            }|s|f|	dd z   }||f|z   S |S t        |||	j                  |	j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        Nr  r   r   r   r  )
rj   r  r  r  r   rC  r   r   ro  rp  )r   r   r   r   r   r  rA  re  rf  r  rn  prediction_logitsmasked_lm_lossr  r  s                  r=   r   zFunnelForMaskedLM.forwardI  s    $ &1%<k$++B]B]++))'/!5#  
 $AJ LL):;')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY$!//))	
 	
r   r  )r   r   r   _tied_weights_keysr   r   r   r&  r  r   r  r   r   rf   r   rN  r   r   r   r   r   r   s   @r=   r  r  6  s   *+|  ryy &BLL &T &  -1151504)-,0/3&*.
ELL).
 !..
 !.	.

  -.
 &.
 $D>.
 'tn.
 d^.
 
un$	%.
 .
r   r  z
    Funnel Transformer Model with a sequence classification/regression head on top (two linear layer on top of the
    first timestep of the last hidden state) e.g. for GLUE tasks.
    c                       e Zd Zdeddf fdZe	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     d	eej                     d
ee	   dee	   dee	   de
eef   fd       Z xZS )FunnelForSequenceClassificationrj   r}   Nc                     t         |   |       |j                  | _        || _        t	        |      | _        t        ||j                        | _        | j                          y r   )	r   r   
num_labelsrj   r  r  r  
classifierr  r   s     r=   r   z(FunnelForSequenceClassification.__init__  sN      ++%f-266;L;LMr   r   r   r   r   r  rA  re  rf  c	           	      ,   ||n| j                   j                  }| j                  |||||||      }	|	d   }
|
dddf   }| j                  |      }d}|| j                   j                  | j
                  dk(  rd| j                   _        nl| j
                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j                  dk(  rIt               }| j
                  dk(  r& ||j                         |j                               }n |||      }n| j                   j                  dk(  r=t               } ||j                  d| j
                        |j                  d            }n,| j                   j                  dk(  rt               } |||      }|s|f|	dd z   }||f|z   S |S t        |||	j                   |	j"                  	      S )
a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr  r   r   
regressionsingle_label_classificationmulti_label_classificationr   r  )rj   r  r  r  problem_typer   r   rf   r   rV   r	   r  r   rC  r   r   ro  rp  )r   r   r   r   r   r  rA  re  rf  r  rn  pooled_outputr  r  r  r  s                   r=   r   z'FunnelForSequenceClassification.forward  s   $ &1%<k$++B]B]++))'/!5#  
 $AJ)!Q$//{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
r   r  )r   r   r   r   r   r   r   rf   r   rN  r   r   r   r   r   r   s   @r=   r  r  {  s    |    -1151504)-,0/3&*A
ELL)A
 !.A
 !.	A

  -A
 &A
 $D>A
 'tnA
 d^A
 
u..	/A
 A
r   r  c                       e Zd Zdeddf fdZe	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     d	eej                     d
ee	   dee	   dee	   de
eef   fd       Z xZS )FunnelForMultipleChoicerj   r}   Nc                     t         |   |       t        |      | _        t	        |d      | _        | j                          y r  )r   r   r  r  r  r  r  r   s     r=   r   z FunnelForMultipleChoice.__init__  s4     %f-261=r   r   r   r   r   r  rA  re  rf  c	           	         ||n| j                   j                  }||j                  d   n|j                  d   }	|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|1|j                  d|j	                  d      |j	                  d            nd}| j                  |||||||      }
|
d   }|dddf   }| j                  |      }|j                  d|	      }d}|t               } |||      }|s|f|
dd z   }||f|z   S |S t        |||
j                  |
j                        S )aJ  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        Nr   r   r  r   r  )rj   r  rb   rC  r   r  r  r   r   ro  rp  )r   r   r   r   r   r  rA  re  rf  num_choicesr  rn  r  r  reshaped_logitsr  r  r  s                     r=   r   zFunnelForMultipleChoice.forward  s   $ &1%<k$++B]B],5,Aiooa(}GZGZ[\G]>G>SINN2y~~b'9:Y]	M[Mg,,R1D1DR1HImqM[Mg,,R1D1DR1HImq ( r=#5#5b#9=;M;Mb;QR 	 ++))'/!5#  
 $AJ)!Q$// ++b+6')HOV4D%''!"+5F)-)9TGf$EvE("!//))	
 	
r   r  )r   r   r   r   r   r   r   rf   r   rN  r   r   r   r   r   r   s   @r=   r	  r	    s    |    -1151504)-,0/3&*:
ELL):
 !.:
 !.	:

  -:
 &:
 $D>:
 'tn:
 d^:
 
u//	0:
 :
r   r	  c                       e Zd Zdeddf fdZe	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     d	eej                     d
ee	   dee	   dee	   de
eef   fd       Z xZS )FunnelForTokenClassificationrj   r}   Nc                 ,   t         |   |       |j                  | _        t        |      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        | j                          y r   )r   r   r   r  r  r   r   r   r   r&  r   r  r  r   s     r=   r   z%FunnelForTokenClassification.__init__  si      ++!&)zz&"7"78))F$6$68I8IJ 	r   r   r   r   r   r  rA  re  rf  c	           	         ||n| j                   j                  }| j                  |||||||      }	|	d   }
| j                  |
      }
| j	                  |
      }d}|<t               } ||j                  d| j                        |j                  d            }|s|f|	dd z   }||f|z   S |S t        |||	j                  |	j                        S )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr  r   r   r   r  )rj   r  r  r   r  r   rC  r   r   ro  rp  )r   r   r   r   r   r  rA  re  rf  r  rn  r  r  r  r  s                  r=   r   z$FunnelForTokenClassification.forward&  s      &1%<k$++B]B]++))'/!5#  
 $AJ LL):;!23')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
r   r  )r   r   r   r   r   r   r   rf   r   rN  r   r   r   r   r   r   s   @r=   r  r    s    	| 	 	  -1151504)-,0/3&*-
ELL)-
 !.-
 !.	-

  --
 &-
 $D>-
 'tn-
 d^-
 
u++	,-
 -
r   r  c                   .    e Zd Zdeddf fdZe	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     d	eej                     d
eej                     dee	   dee	   dee	   de
eef   fd       Z xZS )FunnelForQuestionAnsweringrj   r}   Nc                     t         |   |       |j                  | _        t        |      | _        t        j                  |j                  |j                        | _        | j                          y r   )
r   r   r   r  r  r   r&  r   
qa_outputsr  r   s     r=   r   z#FunnelForQuestionAnswering.__init__Y  sS      ++!&)))F$6$68I8IJ 	r   r   r   r   r   start_positionsend_positionsrA  re  rf  c
           	      $   |	|	n| j                   j                  }	| j                  |||||||	      }
|
d   }| j                  |      }|j	                  dd      \  }}|j                  d      j                         }|j                  d      j                         }d }||t        |j                               dkD  r|j                  d      }t        |j                               dkD  r|j                  d      }|j                  d      }|j                  d|      }|j                  d|      }t        |      } |||      } |||      }||z   dz  }|	s||f|
dd  z   }||f|z   S |S t        ||||
j                  |
j                        S )	Nr  r   r   r   r   )ignore_indexr   )r  start_logits
end_logitsro  rp  )rj   r  r  r  rP   r  
contiguousrc   r   squezeclampr   r   ro  rp  )r   r   r   r   r   r  r  rA  re  rf  r  rn  r  r  r  
total_lossignored_indexr  
start_lossend_lossr  s                        r=   r   z"FunnelForQuestionAnswering.forwardc  s    &1%<k$++B]B]++))'/!5#  
 $AJ!23#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"8"8"<=%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r   r  )r   r   r   r   r   r   r   rf   r   rN  r   r   r   r   r   r   s   @r=   r  r  W  s    |    -11515042604,0/3&*:
ELL):
 !.:
 !.	:

  -:
 "%,,/:
  -:
 $D>:
 'tn:
 d^:
 
u22	3:
 :
r   r  )
r  r  r	  r  r  r  r  r  r  rz   )TF)?r  rG   dataclassesr   typingr   r   rB   rl   rf   r   torch.nnr   r   r	   activationsr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   utilsr   r   r   configuration_funnelr   
get_loggerr   rE   rE  rz   Moduler|   r   r   rV   r!  r]   rT   rY  r_  rN  r  r  r  r  r  r  r  r  r  r  r  r	  r  r  __all__r:   r   r=   <module>r/     s   ( 	 ! "    A A !  . 9 9 . 
		H	% 
Wtryy "A ryy A HELL s SV [`[g[g  MG")) MG`+BII +&E")) E&<uBII <u@ di|| .1AE\`
\\,.uBII .ubRYY   _O _ _<'ryy ' 
: : : @+ @@F Z
' Z
 Z
z L
0 L
L
^ A
- A
 A
H M
&; M
M
` D
3 D
 D
N :
#8 :
 :
z F
!6 F
 F
Rr   