
    rh{                        d Z ddlmZ ddlmZmZmZ ddlZddlZddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZmZ ddlmZmZmZ ddlmZmZmZ ddlm Z   ejB                  e"      Z# G d de	jH                        Z%	 	 d;de	jH                  dejL                  dejL                  dejL                  deejL                     de'de'deejL                     fdZ( G d de	jH                        Z) G d de	jH                        Z* G d d e	jH                        Z+ G d! d"e	jH                        Z, G d# d$e	jH                        Z- G d% d&e      Z. G d' d(e	jH                        Z/e G d) d*e             Z0e G d+ d,e0             Z1 G d- d.e	jH                        Z2 G d/ d0e	jH                        Z3e G d1 d2e0             Z4e ed34       G d5 d6e                    Z5 ed74       G d8 d9e0             Z6g d:Z7y)<zPyTorch Splinter model.    )	dataclass)CallableOptionalUnionN)nn)CrossEntropyLoss   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputModelOutputQuestionAnsweringModelOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringcan_return_tuplelogging   )SplinterConfigc                        e Zd ZdZ fdZ	 	 	 	 d	deej                     deej                     deej                     deej                     de	f
dZ
 xZS )
SplinterEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                    t         |           t        j                  |j                  |j
                  |j                        | _        t        j                  |j                  |j
                        | _	        t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        | j#                  dt%        j&                  |j                        j)                  d      d       t+        |dd      | _        y )	N)padding_idxepsposition_ids)r   F)
persistentposition_embedding_typeabsolute)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandgetattrr"   selfconfig	__class__s     /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/splinter/modeling_splinter.pyr%   zSplinterEmbeddings.__init__,   s    !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]" f&8&8f>S>STzz&"<"<= 	ELL)G)GHOOPWXej 	 	
 (/v7PR\']$    	input_idstoken_type_idsr   inputs_embedsreturnc                    ||j                         }n|j                         d d }|d   }|| j                  d d d |f   }|:t        j                  |t        j                  | j                  j
                        }|| j                  |      }| j                  |      }||z   }| j                  dk(  r| j                  |      }	||	z  }| j                  |      }| j                  |      }|S )Nr    r   dtypedevicer#   )sizer   r5   zeroslongrF   r*   r.   r"   r,   r/   r3   )
r:   r?   r@   r   rA   input_shape
seq_lengthr.   
embeddingsr,   s
             r=   forwardzSplinterEmbeddings.forward=   s      #..*K',,.s3K ^
,,Q^<L!"[[EJJtO`O`OgOghN  00;M $ : :> J"%::
'':5"&":":<"H--J^^J/
\\*-
r>   )NNNN)__name__
__module____qualname____doc__r%   r   r5   
LongTensorFloatTensortuplerM   __classcell__r<   s   @r=   r   r   )   s{    Q^& 15593759E,,- !!1!12 u//0	
   1 12 
r>   r   modulequerykeyvalueattention_maskscalingr3   	head_maskc                 .   t        j                  ||j                  dd            |z  }	|#|d d d d d d d |j                  d   f   }
|	|
z   }	t        j
                  j                  |	dt         j                        j                  |j                        }	t        j
                  j                  |	|| j                        }	||	|j                  dddd      z  }	t        j                  |	|      }|j                  dd      j                         }||	fS )N   r	   r    )dimrE   )ptrainingr   )r5   matmul	transposeshaper   
functionalsoftmaxfloat32torE   r3   rc   view
contiguous)rW   rX   rY   rZ   r[   r\   r3   r]   kwargsattn_weightscausal_maskattn_outputs               r=   eager_attention_forwardrq   _   s     <<s}}Q':;gEL!$Q1o		"o%=>#k1==((2U]](SVVW\WbWbcL==((6??([L#innQAq&AA,,|U3K''1-88:K$$r>   c                        e Zd Z fdZ	 	 	 ddej
                  deej                     deej                     dee   de	ej
                     f
dZ
 xZS )	SplinterSelfAttentionc                 $   t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      || _        |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _	        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                         | _        |j                   | _        | j                  dz  | _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()g      )r$   r%   r(   num_attention_headshasattr
ValueErrorr;   intattention_head_sizeall_head_sizer   LinearrX   rY   rZ   r1   attention_probs_dropout_probr3   attention_dropoutr\   r9   s     r=   r%   zSplinterSelfAttention.__init__|   sC    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF!'!D!D//5r>   hidden_statesr[   r]   output_attentionsrB   c                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
t        }| j                  j                  dk7  rt        | j                  j                     } || ||	|
|f| j                  sdn| j                  | j                  |d|\  }} |j                  g |d j                         }|r||f}|S |f}|S )Nr    r   r_   eager        )r3   r\   r]   )rf   r{   rX   rk   re   rY   rZ   rq   r;   _attn_implementationr   rc   r   r\   reshaperl   )r:   r   r[   r]   r   rm   rJ   hidden_shapequery_states
key_statesvalue_statesattention_interfacerp   rn   outputss                  r=   rM   zSplinterSelfAttention.forward   sa    $))#2.CCbC$*B*BCzz-055lCMMaQRSXXm,11,?II!QO
zz-055lCMMaQRS(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL
%
 
%
!\ *k));;;;FFH1B;- JUr>   NNF)rN   rO   rP   r%   r5   Tensorr   rS   boolrT   rM   rU   rV   s   @r=   rs   rs   {   so    60 7;15,1!||! !!2!23! E--.	!
 $D>! 
u||	!r>   rs   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )SplinterSelfOutputc                 (   t         |           t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                        | _
        y Nr   )r$   r%   r   r}   r(   denser/   r0   r1   r2   r3   r9   s     r=   r%   zSplinterSelfOutput.__init__   s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=r>   r   input_tensorrB   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S Nr   r3   r/   r:   r   r   s      r=   rM   zSplinterSelfOutput.forward   7    

=1]3}|'CDr>   rN   rO   rP   r%   r5   r   rM   rU   rV   s   @r=   r   r      1    >U\\  RWR^R^ r>   r   c                        e Zd Z fdZd Z	 	 	 d	dej                  deej                     deej                     dee	   de
ej                     f
dZ xZS )
SplinterAttentionc                     t         |           t        |      | _        t	        |      | _        t               | _        y r   )r$   r%   rs   r:   r   outputsetpruned_headsr9   s     r=   r%   zSplinterAttention.__init__   s0    )&1	(0Er>   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   ra   )lenr   r:   rw   r{   r   r   rX   rY   rZ   r   r   r|   union)r:   headsindexs      r=   prune_headszSplinterAttention.prune_heads   s   u:?749900$))2O2OQUQbQb
u
 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r>   r   r[   r]   r   rB   c                 p     | j                   |f|||d|}| j                  |d   |      }|f|dd  z   }|S N)r[   r]   r   r   r   )r:   r   )	r:   r   r[   r]   r   rm   self_outputsattention_outputr   s	            r=   rM   zSplinterAttention.forward   s_     !tyy
)/	

 
  ;;|AF#%QR(88r>   r   )rN   rO   rP   r%   r   r5   r   r   rS   r   rT   rM   rU   rV   s   @r=   r   r      st    ";* 7;15,1|| !!2!23 E--.	
 $D> 
u||	r>   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )SplinterIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y r   )r$   r%   r   r}   r(   intermediate_sizer   
isinstance
hidden_actstrr
   intermediate_act_fnr9   s     r=   r%   zSplinterIntermediate.__init__   s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$r>   r   rB   c                 J    | j                  |      }| j                  |      }|S r   )r   r   )r:   r   s     r=   rM   zSplinterIntermediate.forward   s&    

=100?r>   r   rV   s   @r=   r   r      s#    9U\\ ell r>   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )SplinterOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )r$   r%   r   r}   r   r(   r   r/   r0   r1   r2   r3   r9   s     r=   r%   zSplinterOutput.__init__  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r>   r   r   rB   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S r   r   r   s      r=   rM   zSplinterOutput.forward
  r   r>   r   rV   s   @r=   r   r     r   r>   r   c                        e Zd Z fdZ	 	 	 d	dej
                  deej                     deej                     dee   de	ej
                     f
dZ
d Z xZS )
SplinterLayerc                     t         |           |j                  | _        d| _        t	        |      | _        t        |      | _        t        |      | _	        y )Nr   )
r$   r%   chunk_size_feed_forwardseq_len_dimr   	attentionr   intermediater   r   r9   s     r=   r%   zSplinterLayer.__init__  sI    '-'E'E$*6208$V,r>   r   r[   r]   r   rB   c                      | j                   |f|||d|}|d   }|dd  }t        | j                  | j                  | j                  |      }	|	f|z   }|S r   )r   r   feed_forward_chunkr   r   )
r:   r   r[   r]   r   rm   self_attention_outputsr   r   layer_outputs
             r=   rM   zSplinterLayer.forward  s     "0"
)/	"

 "
 2!4(,0##T%A%A4CSCSUe
  /G+r>   c                 L    | j                  |      }| j                  ||      }|S r   )r   r   )r:   r   intermediate_outputr   s       r=   r   z SplinterLayer.feed_forward_chunk4  s,    "//0@A{{#68HIr>   r   )rN   rO   rP   r%   r5   r   r   rS   r   rT   rM   r   rU   rV   s   @r=   r   r     st    - 7;15,1|| !!2!23 E--.	
 $D> 
u||	2r>   r   c                        e Zd Z fdZe	 	 	 	 	 d
dej                  deej                     deej                     dee	   dee	   dee	   de
eej                     ef   fd	       Z xZS )SplinterEncoderc                     t         |           || _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        d| _	        y c c}w )NF)
r$   r%   r;   r   
ModuleListrangenum_hidden_layersr   layergradient_checkpointing)r:   r;   ir<   s      r=   r%   zSplinterEncoder.__init__<  sN    ]]5IaIaCb#caM&$9#cd
&+# $ds   A#r   r[   r]   r   output_hidden_statesreturn_dictrB   c           	          |rdnd }|rdnd }	t        | j                        D ]4  \  }
}|r||fz   }|||
   nd } |d||||d|}|d   }|s,|	|d   fz   }	6 |r||fz   }t        |||	      S )N )r   r[   r]   r   r   r   last_hidden_stater   
attentions)	enumerater   r   )r:   r   r[   r]   r   r   r   rm   all_hidden_statesall_self_attentionsr   layer_modulelayer_head_masklayer_outputss                 r=   rM   zSplinterEncoder.forwardB  s     #7BD$5b4(4 	POA|#$58H$H!.7.CilO( +-)"3	
 M *!,M &9]1=M<O&O#!	P$   1]4D D++*
 	
r>   )NNFFT)rN   rO   rP   r%   r   r5   r   r   rS   r   r   rT   r   rM   rU   rV   s   @r=   r   r   ;  s    ,  7;15,1/4&*&
||&
 !!2!23&
 E--.	&

 $D>&
 'tn&
 d^&
 
uU\\"O3	4&
 &
r>   r   c                   &    e Zd ZU eed<   dZdZd Zy)SplinterPreTrainedModelr;   splinterTc                    t        |t        j                        rm|j                  j                  j                  d| j                  j                         |j                  %|j                  j                  j                          yyt        |t        j                        rz|j                  j                  j                  d| j                  j                         |j                  2|j                  j                  |j                     j                          yyt        |t        j                        rJ|j                  j                  j                          |j                  j                  j                  d       yy)zInitialize the weightsr   )meanstdNg      ?)r   r   r}   weightdatanormal_r;   initializer_rangebiaszero_r&   r   r/   fill_)r:   rW   s     r=   _init_weightsz%SplinterPreTrainedModel._init_weightsr  s   fbii( MM&&CT[[5R5R&S{{&  &&( '-MM&&CT[[5R5R&S!!-""6#5#56<<> .-KK""$MM$$S) .r>   N)rN   rO   rP   r   __annotations__base_model_prefixsupports_gradient_checkpointingr   r   r>   r=   r   r   l  s    "&*#*r>   r   c                   D    e Zd ZdZ fdZd Zd Zd Zee		 	 	 	 	 	 	 	 	 dde
ej                     de
ej                     de
ej                     d	e
ej                     d
e
ej                     de
ej                     de
e   de
e   de
e   deeef   fd              Z xZS )SplinterModela2  
    The model is an encoder (with only self-attention) following the architecture described in [Attention is all you
    need](https://huggingface.co/papers/1706.03762) by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones,
    Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.
    c                     t         |   |       || _        t        |      | _        t        |      | _        | j                          y r   )r$   r%   r;   r   rL   r   encoder	post_initr9   s     r=   r%   zSplinterModel.__init__  s;     ,V4&v. 	r>   c                 .    | j                   j                  S r   rL   r*   )r:   s    r=   get_input_embeddingsz"SplinterModel.get_input_embeddings  s    ...r>   c                 &    || j                   _        y r   r   )r:   rZ   s     r=   set_input_embeddingsz"SplinterModel.set_input_embeddings  s    */'r>   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr   r   r   r   )r:   heads_to_pruner   r   s       r=   _prune_headszSplinterModel._prune_heads  sE    
 +002 	CLE5LLu%//;;EB	Cr>   r?   r[   r@   r   r]   rA   r   r   r   rB   c
                 "   ||n| j                   j                  }||n| j                   j                  }|	|	n| j                   j                  }	||t	        d      |#| j                  ||       |j                         }
n!||j                         dd }
nt	        d      |
\  }}||j                  n|j                  }|t        j                  ||f|      }|&t        j                  |
t        j                  |      }| j                  ||
      }| j                  || j                   j                        }| j                  ||||      }| j!                  |||||d	      }|d
   }t#        ||j$                  |j&                        S )a  
        token_type_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        NzDYou cannot specify both input_ids and inputs_embeds at the same timer    z5You have to specify either input_ids or inputs_embeds)rF   rD   )r?   r   r@   rA   T)r[   r]   r   r   r   r   r   )r;   r   r   use_return_dictry   %warn_if_padding_and_no_attention_maskrG   rF   r5   onesrH   rI   get_extended_attention_maskget_head_maskr   rL   r   r   r   r   )r:   r?   r[   r@   r   r]   rA   r   r   r   rJ   
batch_sizerK   rF   extended_attention_maskembedding_outputencoder_outputssequence_outputs                     r=   rM   zSplinterModel.forward  s   : 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66y.Q#..*K&',,.s3KTUU!,
J%.%:!!@T@T!"ZZ*j)A6RN!"[[EJJvVN 150P0PQ_al0m &&y$++2O2OP	??%)'	 + 
 ,,2/!5 ' 
 *!,-)77&11
 	
r>   )	NNNNNNNNN)rN   rO   rP   rQ   r%   r   r   r   r   r   r   r5   r   r   r   rT   r   rM   rU   rV   s   @r=   r   r     s   /0C  -11515/3,004,0/3&*R
ELL)R
 !.R
 !.	R

 u||,R
 ELL)R
  -R
 $D>R
 'tnR
 d^R
 
uo%	&R
  R
r>   r   c                   X     e Zd Zd fd	Zdej
                  dej
                  fdZ xZS )SplinterFullyConnectedLayerc                     t         |           || _        || _        t	        j
                  | j                  | j                        | _        t        |   | _        t	        j                  | j                        | _	        y r   )
r$   r%   	input_dim
output_dimr   r}   r   r
   act_fnr/   )r:   r  r  r   r<   s       r=   r%   z$SplinterFullyConnectedLayer.__init__  sV    "$YYt~~t?
Z(doo6r>   inputsrB   c                 l    | j                  |      }| j                  |      }| j                  |      }|S r   )r   r  r/   )r:   r  r   s      r=   rM   z#SplinterFullyConnectedLayer.forward  s2    

6*M2}5r>   )gelur   rV   s   @r=   r	  r	    s#    7ell u|| r>   r	  c                   (     e Zd ZdZ fdZd Z xZS )QuestionAwareSpanSelectionHeadzf
    Implementation of Question-Aware Span Selection (QASS) head, described in Splinter's paper:

    c                    t         |           t        |j                  |j                        | _        t        |j                  |j                        | _        t        |j                  |j                        | _        t        |j                  |j                        | _        t        j                  |j                  |j                  d      | _
        t        j                  |j                  |j                  d      | _        y )NF)r   )r$   r%   r	  r(   query_start_transformquery_end_transformstart_transformend_transformr   r}   start_classifierend_classifierr9   s     r=   r%   z'QuestionAwareSpanSelectionHead.__init__  s    %@ASASU[UgUg%h"#>v?Q?QSYSeSe#f :6;M;MvOaOab89K9KVM_M_` "		&*<*<f>P>PW\ ] ii(:(:F<N<NUZ[r>   c                    |j                         \  }}}|j                  d      j                  dd|      }t        j                  |d|      }| j                  |      }| j                  |      }| j                  |      }	| j                  |      }
| j                  |      }|	j                  ddd      }	t        j                  ||	      }| j                  |      }|
j                  ddd      }
t        j                  ||
      }||fS )Nr    r   )ra   r   r   r_   )rG   	unsqueezerepeatr5   gatherr  r  r  r  r  permuterd   r  )r:   r  	positions_ra   r   gathered_repsquery_start_repsquery_end_reps
start_repsend_repsr   start_logits
end_logitss                 r=   rM   z&QuestionAwareSpanSelectionHead.forward  s    KKM	1c##B'..q!S9V%@55mD11-@))&1
%%f---.>?''1a0
||M:>++N;##Aq!,\\-:
Z''r>   )rN   rO   rP   rQ   r%   rM   rU   rV   s   @r=   r  r    s    
	\(r>   r  c                       e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
ee	   dee	   dee	   deej                     de
eef   fd       Z xZS )SplinterForQuestionAnsweringc                     t         |   |       t        |      | _        t	        |      | _        |j                  | _        | j                          y r   r$   r%   r   r   r  splinter_qassquestion_token_idr   r9   s     r=   r%   z%SplinterForQuestionAnswering.__init__4  C     %f-;FC!'!9!9 	r>   r?   r[   r@   r   r]   rA   start_positionsend_positionsr   r   r   question_positionsrB   c                    ||n| j                   j                  }d}||Dt        j                  t        j                  || j
                        j                         d      }nJt        j                  |j                  d      t        j                  |j                  |j                        }|j                  d      }d}| j                  |||||||	|
|	      }|d   }| j                  ||      \  }}|r"|j                  d	      |j                  d	      }}|d|d	|z
  t        j                   |j"                        j$                  z  z   }|d	|z
  t        j                   |j"                        j$                  z  z   }d}||t'        |j                               d	kD  r|j                  d      }t'        |j                               d	kD  r|j                  d      }|j                  d	      }|j)                  d|       |j)                  d|       t+        |
      } |||      } |||      }||z   dz  }|s||f|d	d z   }||f|z   S |S t-        ||||j.                  |j0                        S )a  
        token_type_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        question_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
            The positions of all question tokens. If given, start_logits and end_logits will be of shape `(batch_size,
            num_questions, sequence_length)`. If None, the first question token in each sequence in the batch will be
            the only one for which start_logits and end_logits are calculated and they will be of shape `(batch_size,
            sequence_length)`.
        NFr    r   r   )rE   layoutrF   Tr[   r@   r   r]   rA   r   r   r   r   ignore_indexr_   lossr&  r'  r   r   )r;   r   r5   argmaxeqr-  rz   rH   rG   rI   r3  rF   r  r   r,  squeezefinforE   minr   clamp_r   r   r   r   )r:   r?   r[   r@   r   r]   rA   r/  r0  r   r   r   r1  question_positions_were_none"question_position_for_each_exampler   r  r&  r'  
total_lossignored_indexloss_fct
start_lossend_lossr   s                            r=   rM   z$SplinterForQuestionAnswering.forward>  s   H &1%<k$++B]B]',$%$5:\\XXi)?)?@EEGR62 6;[[!&&q)MDXDXanauau62 "D!M!Mb!Q+/(--))%'/!5#   

 "!*#'#5#5oGY#Z j''3';';A'>
@R@RST@U*L%'1~+=\M_M_A`AdAd*ddL#q>'9U[[IYIY=Z=^=^&^^J
&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M""1m4  M2']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r>   NNNNNNNNNNNN)rN   rO   rP   r%   r   r   r5   r   rR   r   r   rT   r   rM   rU   rV   s   @r=   r)  r)  2  s?     -11515/3,0046:48,0/3&*9=c
ELL)c
 !.c
 !.	c

 u||,c
 ELL)c
  -c
 "%"2"23c
   0 01c
 $D>c
 'tnc
 d^c
 %U%5%56c
 
u22	3c
 c
r>   r)  zB
    Class for outputs of Splinter as a span selection model.
    )custom_introc                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeeej                        ed<   dZeeej                        ed<   y)SplinterForPreTrainingOutputa  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when start and end positions are provided):
        Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.
    start_logits (`torch.FloatTensor` of shape `(batch_size, num_questions, sequence_length)`):
        Span-start scores (before SoftMax).
    end_logits (`torch.FloatTensor` of shape `(batch_size, num_questions, sequence_length)`):
        Span-end scores (before SoftMax).
    Nr8  r&  r'  r   r   )rN   rO   rP   rQ   r8  r   r5   rS   r   r&  r'  r   rT   r   r   r>   r=   rI  rI    s|     )-D(5$$
%,04L(5,,-4.2J**+28<M8E%"3"345<59Ju00129r>   rI  z
    Splinter Model for the recurring span selection task as done during the pretraining. The difference to the QA task
    is that we do not have a question, but multiple question tokens that replace the occurrences of recurring spans
    instead.
    c                       e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
ee	   dee	   dee	   deej                     de
eef   fd       Zdej                  dej                  fdZ xZS )SplinterForPreTrainingc                     t         |   |       t        |      | _        t	        |      | _        |j                  | _        | j                          y r   r+  r9   s     r=   r%   zSplinterForPreTraining.__init__  r.  r>   r?   r[   r@   r   r]   rA   r/  r0  r   r   r   r1  rB   c                 b   ||n| j                   j                  }|||t        d      ||t        d      || j                  |      }| j	                  |||||||	|
|	      }|d   }|j                         \  }}}| j                  ||      \  }}|j                  d      }||j                  d      j                  |||      }|d|z
  t        j                  |j                        j                  z  z   }|d|z
  t        j                  |j                        j                  z  z   }d}|||j                  dt        d|dz
               |j                  dt        d|dz
               t        | j                   j                         } ||j#                  ||z  |      |j#                  ||z              } ||j#                  ||z  |      |j#                  ||z              }||z   dz  }|s||f|dd z   }||f|z   S |S t%        ||||j&                  |j(                  	      S )
a  
        input_ids (`torch.LongTensor` of shape `(batch_size, num_questions, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        token_type_ids (`torch.LongTensor` of shape `batch_size, num_questions, sequence_length`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `batch_size, num_questions, sequence_length`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_questions, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        start_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        question_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
            The positions of all question tokens. If given, start_logits and end_logits will be of shape `(batch_size,
            num_questions, sequence_length)`. If None, the first question token in each sequence in the batch will be
            the only one for which start_logits and end_logits are calculated and they will be of shape `(batch_size,
            sequence_length)`.
        NzCquestion_positions must be specified in order to calculate the lossz>question_positions must be specified when input_embeds is usedr4  r   r   r5  r_   r7  )r;   r   	TypeError_prepare_question_positionsr   rG   r,  r  r7   r5   r<  rE   r=  r>  maxr   r)   rk   rI  r   r   )r:   r?   r[   r@   r   r]   rA   r/  r0  r   r   r   r1  r   r  r  sequence_lengthra   r&  r'  num_questions attention_mask_for_each_questionrA  rC  rD  rE  r   s                              r=   rM   zSplinterForPreTraining.forward  s   n &1%<k$++B]B]%/*E-Jcabb'I,=\]]'!%!A!A)!L--))%'/!5#   

 "!*+:+?+?+A(
OS#'#5#5oGY#Z j*//2%/=/G/G/J/Q/QM?0, (1/O+OSXS^S^_k_q_qSrSvSv*vvL#q+K'Ku{{[e[k[kOlOpOp&ppJ
&=+D""1c!_q-@&AB  C?Q+>$?@ (T[[5M5MNH!!!*}"<oN$$Z-%?@J  
] :OL"":#=>H %x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r>   c                    t        j                  || j                  j                  k(        \  }}t        j                  |      }t        j
                  |j                  d      |j                         f| j                  j                  t         j                  |j                        }t        j                  |D cg c]  }t        j                  |       c}      }||||f<   |S c c}w )Nr   rD   )r5   wherer;   r-  bincountfullrG   rP  r)   rI   rF   catr6   )r:   r?   rowsflat_positionsrR  r  ncolss           r=   rO  z2SplinterForPreTraining._prepare_question_positionsK  s    ${{98U8U+UVnt,JJ^^A 1 1 34KK$$**##	
	 yy=Aa%,,q/AB .	$* Bs   <C(rF  )rN   rO   rP   r%   r   r   r5   r   rR   r   r   rT   rI  rM   rO  rU   rV   s   @r=   rK  rK    s[     -11515/3,0046:48,0/3&*9=z
ELL)z
 !.z
 !.	z

 u||,z
 ELL)z
  -z
 "%"2"23z
   0 01z
 $D>z
 'tnz
 d^z
 %U%5%56z
 
u22	3z
 z
xU\\ ell r>   rK  )r)  rK  r   r   r   )r   N)8rQ   dataclassesr   typingr   r   r   r5   torch.utils.checkpointr   torch.nnr   activationsr
   modeling_layersr   modeling_outputsr   r   r   modeling_utilsr   r   pytorch_utilsr   r   r   utilsr   r   r   configuration_splinterr   
get_loggerrN   loggerModuler   r   floatrq   rs   r   r   r   r   r   r   r   r   r	  r  r)  rI  rK  __all__r   r>   r=   <module>rm     s.    ! , ,    % ! 9 Z Z F l l 
 3 
		H	%2 2z (,%II%<<% 
% <<	%
 U\\*% % % %%87BII 7v *		 *\299  RYY %. %R.
bii .
b *o * *, s
+ s
 s
l")) $#(RYY #(L o
#: o
 o
d 
:; : :" S4 SSlr>   