
    rhb                        d Z ddlZddlmZ ddlmZmZ ddlZddlZddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZmZmZ ddlmZmZmZmZ ddlm Z   ejB                  e"      Z#e ed       G d de                    Z$ G d de	jJ                        Z& G d de	jJ                        Z' G d de	jJ                        Z( G d de	jJ                        Z) G d de	jJ                        Z* G d de	jJ                        Z+ G d  d!e	jJ                        Z, G d" d#e	jJ                        Z- G d$ d%e	jJ                        Z. G d& d'e      Z/ G d( d)e	jJ                        Z0 G d* d+e	jJ                        Z1 G d, d-e	jJ                        Z2e G d. d/e             Z3e G d0 d1e3             Z4e G d2 d3e3             Z5 ed4       G d5 d6e3             Z6 ed7       G d8 d9e3             Z7g d:Z8y);zPyTorch Bros model.    N)	dataclass)OptionalUnion)nn)CrossEntropyLoss   )ACT2FN)GradientCheckpointingLayer)"BaseModelOutputWithCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)ModelOutputauto_docstringcan_return_tuplelogging   )
BrosConfigz@
    Base class for outputs of token classification models.
    )custom_introc                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeeej                        ed<   dZeeej                        ed<   y)BrosSpadeOutputa  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
        Classification loss.
    initial_token_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.num_labels)`):
        Classification scores for entity initial tokens (before SoftMax).
    subsequent_token_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, sequence_length+1)`):
        Classification scores for entity sequence tokens (before SoftMax).
    Nlossinitial_token_logitssubsequent_token_logitshidden_states
attentions)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   r   tupler        y/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/bros/modeling_bros.pyr   r   *   s~     )-D(5$$
%,8<(5#4#45<;?Xe&7&78?8<M8E%"3"345<59Ju00129r)   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BrosPositionalEmbedding1Dc                     t         |           |j                  | _        ddt        j                  d| j                  d      | j                  z  z  z  }| j                  d|       y )Nr   i'          g       @inv_freq)super__init__dim_bbox_sinusoid_emb_1dr$   arangeregister_buffer)selfconfigr/   	__class__s      r*   r1   z"BrosPositionalEmbedding1D.__init__D   s^    (.(G(G%ell3(E(EsKdNkNkkl
 	Z2r)   pos_seqreturnc                    |j                         }|\  }}}|j                  |||d      | j                  j                  ddd| j                  dz        z  }t	        j
                  |j                         |j                         gd      }|S )Nr      dim)sizeviewr/   r2   r$   catsincos)r5   r8   seq_sizeb1b2b3sinusoid_inppos_embs           r*   forwardz!BrosPositionalEmbedding1D.forwardN   s    <<>
B||BB2T]]5G5G1aQUQnQnrsQs5tt))\--/1A1A1CD"Mr)   r    r!   r"   r1   r$   TensorrJ   __classcell__r7   s   @r*   r,   r,   A   s#    3u||  r)   r,   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BrosPositionalEmbedding2Dc                     t         |           |j                  | _        t        |      | _        t        |      | _        y N)r0   r1   dim_bboxr,   	x_pos_emb	y_pos_embr5   r6   r7   s     r*   r1   z"BrosPositionalEmbedding2D.__init__W   s1    26:26:r)   bboxr9   c                    g }t        | j                        D ]U  }|dz  dk(  r&|j                  | j                  |d|f                1|j                  | j	                  |d|f                W t        j                  |d      }|S )Nr;   r   .r<   r=   )rangerS   appendrT   rU   r$   rA   )r5   rW   stackibbox_pos_embs        r*   rJ   z!BrosPositionalEmbedding2D.forward^   s|    t}}% 	;A1uzT^^DaL9:T^^DaL9:		;
 yyB/r)   rK   rN   s   @r*   rP   rP   V   s#    ;ELL U\\ r)   rP   c                   >     e Zd Z fdZdej
                  fdZ xZS )BrosBboxEmbeddingsc                     t         |           t        |      | _        t	        j
                  |j                  |j                  d      | _        y )NF)bias)	r0   r1   rP   bbox_sinusoid_embr   Lineardim_bbox_sinusoid_emb_2ddim_bbox_projectionbbox_projectionrV   s     r*   r1   zBrosBboxEmbeddings.__init__j   s=    !:6!B!yy)H)H&JdJdkpqr)   rW   c                     |j                  dd      }|d d d d d d d f   |d d d d d d d f   z
  }| j                  |      }| j                  |      }|S )Nr   r   )	transposerb   rf   )r5   rW   bbox_tbbox_posr]   s        r*   rJ   zBrosBboxEmbeddings.forwardo   s\    1%$1a-(6!T1a-+@@--h7++L9r)   rK   rN   s   @r*   r_   r_   i   s    r
ELL r)   r_   c                        e Zd ZdZ fdZ	 	 	 	 d	deej                     deej                     deej                     deej                     dej                  f
dZ xZ	S )
BrosTextEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                 d   t         |           t        j                  |j                  |j
                  |j                        | _        t        j                  |j                  |j
                        | _	        t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        t#        |dd      | _        | j'                  dt)        j*                  |j                        j-                  d             | j'                  dt)        j.                  | j0                  j3                         t(        j4                  | j0                  j6                        d	
       y )N)padding_idxepsposition_embedding_typeabsoluteposition_ids)r   r<   token_type_idsdtypedeviceF)
persistent)r0   r1   r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutgetattrrq   r4   r$   r3   expandzerosrs   r?   longrw   rV   s     r*   r1   zBrosTextEmbeddings.__init__{   s8   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]" f&8&8f>S>STzz&"<"<='.v7PR\']$^U\\&:X:X-Y-`-`ah-ijKK!!&&(jj((//
  	 	
r)   	input_idsrt   rs   inputs_embedsr9   c                 T   ||j                         }n|j                         d d }|d   }|| j                  d d d |f   }|st        | d      r-| j                  d d d |f   }|j	                  |d   |      }|}n:t        j                  |t
        j                  | j                  j                        }|| j                  |      }| j                  |      }	||	z   }
| j                  dk(  r| j                  |      }|
|z  }
| j                  |
      }
| j                  |
      }
|
S )Nr<   r   rt   r   ru   rr   )r?   rs   hasattrrt   r   r$   r   r   rw   r}   r   rq   r   r   r   )r5   r   rt   rs   r   input_shape
seq_lengthbuffered_token_type_ids buffered_token_type_ids_expandedr   
embeddingsr   s               r*   rJ   zBrosTextEmbeddings.forward   s=     #..*K',,.s3K ^
,,Q^<L!t-.*.*=*=a*n*M'3J3Q3QR]^_R`bl3m0!A!&[

SWSdSdSkSk!l  00;M $ : :> J"%::
'':5"&":":<"H--J^^J/
\\*-
r)   )NNNN)
r    r!   r"   r#   r1   r   r$   rL   rJ   rM   rN   s   @r*   rl   rl   x   sv    Q
4 -115/304#ELL)# !.# u||,	#
  -# 
#r)   rl   c                       e Zd Z fdZ	 	 	 	 	 ddej
                  dej
                  deej
                     deej
                     deej
                     deej
                     deej
                     d	eej
                     fd
Z xZ	S )BrosSelfAttentionc                    t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                        | _        t#        |dd      | _        | j$                  dk(  s| j$                  d	k(  rF|j&                  | _        t        j(                  d
|j&                  z  dz
  | j                        | _        |j,                  | _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()rq   rr   relative_keyrelative_key_queryr;   r   )r0   r1   r{   num_attention_headsr   
ValueErrorintattention_head_sizeall_head_sizer   rc   querykeyvaluer   attention_probs_dropout_probr   r   rq   r~   ry   distance_embedding
is_decoderrV   s     r*   r1   zBrosSelfAttention.__init__   s    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF'.v7PR\']$''>9T=Y=Y]q=q+1+I+ID(&(ll1v7U7U3UXY3Y[_[s[s&tD# ++r)   r   r]   attention_mask	head_maskencoder_hidden_statesencoder_attention_maskoutput_attentionsr9   c                    |j                   d   d| j                  | j                  f}| j                  |      j	                  |      j                  dd      }	|d u}
|
rc| j                  |      j	                  |      j                  dd      }| j                  |      j	                  |      j                  dd      }|}n`| j                  |      j	                  |      j                  dd      }| j                  |      j	                  |      j                  dd      }t        j                  |	|j                  dd            }| j                  dk(  s| j                  dk(  rF|j                         d   }t        j                  |t        j                  |j                        j	                  dd      }t        j                  |t        j                  |j                        j	                  dd      }||z
  }| j                  || j                   z   dz
        }|j#                  |	j$                  	      }| j                  dk(  rt        j&                  d
|	|      }||z   }nE| j                  dk(  r6t        j&                  d
|	|      }t        j&                  d||      }||z   |z   }|	j                   \  }}}}|j	                  ||||      }|j)                  g d      }t        j&                  d|	|f      }||z   }|t+        j,                  | j                        z  }|||z   } t/        j0                  d      |      }| j3                  |      }|||z  }t        j                  ||      }|j)                  dddd      j5                         }|j                         d d | j6                  fz   } |j                  | }|r||fn|f}| j8                  r|dz   }|S )Nr   r<   r   r;   r   r   ru   )rv   zbhld,lrd->bhlrzbhrd,lrd->bhlr)r;   r   r   r   zbnid,bijd->bnijr=   r   rR   )shaper   r   r   r@   rh   r   r   r$   matmulrq   r?   r3   r   rw   r   r~   torv   einsumpermutemathsqrtr   Softmaxr   
contiguousr   r   )r5   r   r]   r   r   r   r   r   hidden_shapequery_layeris_cross_attention	key_layervalue_layerattention_scoresr   position_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_key
batch_sizen_headd_headbbox_pos_scoresattention_probscontext_layernew_context_layer_shapeoutputss                                 r*   rJ   zBrosSelfAttention.forward   s    &++A.D4L4LdNfNfgjj/44\BLLQPQR
 3$>!67<<\JTTUVXYZI**%:;@@NXXYZ\]^K3N/44\BLLQPQRI**]388FPPQRTUVK !<<Y5H5HR5PQ''>9T=Y=Y]q=q&++-a0J"\\*EJJ}OcOcdiijlnopN"\\*EJJ}OcOcdiijkmopN%6H#'#:#:8dFbFb;bef;f#g #7#:#:ARAR#:#S ++~=+0<<8H+Wk+l(#36N#N --1EE16>NP[]q1r./4||<LiYm/n,#36T#TWs#s  2=1B1B.
FJ#((ZVT#++L9,,'8;:UV+o=+dii8P8P.QQ%/.@ -"**,-=> ,,7  -	9O_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S***,CD6G=/2mM]??'Gr)   NNNNF)
r    r!   r"   r1   r$   rL   r   r'   rJ   rM   rN   s   @r*   r   r      s    ,8 26,08<9=49P||P llP !.	P
 ELL)P  (5P !) 6P $ELL1P 
u||	Pr)   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )BrosSelfOutputc                 (   t         |           t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                        | _
        y Nro   )r0   r1   r   rc   r{   denser   r   r   r   r   rV   s     r*   r1   zBrosSelfOutput.__init__'  s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=r)   r   input_tensorr9   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S rR   r   r   r   r5   r   r   s      r*   rJ   zBrosSelfOutput.forward-  7    

=1]3}|'CDr)   rK   rN   s   @r*   r   r   &  1    >U\\  RWR^R^ r)   r   c                       e Zd Z fdZd Z	 	 	 	 	 ddej                  dej                  deej                     deej                     deej                     deej                     d	ee   d
e	ej                     fdZ
 xZS )BrosAttentionc                     t         |           t        |      | _        t	        |      | _        t               | _        y rR   )r0   r1   r   r5   r   outputsetpruned_headsrV   s     r*   r1   zBrosAttention.__init__5  s0    %f-	$V,Er)   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   r=   )lenr   r5   r   r   r   r   r   r   r   r   r   r   union)r5   headsindexs      r*   prune_headszBrosAttention.prune_heads;  s   u:?7II))II))	
u -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r)   r   r]   r   r   r   r   r   r9   c           	      r    | j                  |||||||      }| j                  |d   |      }	|	f|dd  z   }
|
S )Nr   r]   r   r   r   r   r   r   r   )r5   r   )r5   r   r]   r   r   r   r   r   self_outputsattention_outputr   s              r*   rJ   zBrosAttention.forwardP  s\     yy'%)"7#9/ ! 
  ;;|AF#%QR(88r)   r   )r    r!   r"   r1   r   r$   rL   r   boolr'   rJ   rM   rN   s   @r*   r   r   4  s    ";2 26,08<9=,1|| ll !.	
 ELL)  (5 !) 6 $D> 
u||	r)   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BrosIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y rR   )r0   r1   r   rc   r{   intermediate_sizer   
isinstance
hidden_actstrr	   intermediate_act_fnrV   s     r*   r1   zBrosIntermediate.__init__j  s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$r)   r   r9   c                 J    | j                  |      }| j                  |      }|S rR   )r   r   )r5   r   s     r*   rJ   zBrosIntermediate.forwardr  s&    

=100?r)   rK   rN   s   @r*   r   r   i  s#    9U\\ ell r)   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )
BrosOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )r0   r1   r   rc   r   r{   r   r   r   r   r   r   rV   s     r*   r1   zBrosOutput.__init__y  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r)   r   r   r9   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S rR   r   r   s      r*   rJ   zBrosOutput.forward  r   r)   rK   rN   s   @r*   r   r   x  r   r)   r   c                       e Zd Z fdZ	 	 	 	 	 ddej
                  dej
                  deej                     deej                     deej                     deej                     dee   d	e	ej
                     fd
Z
d Z xZS )	BrosLayerc                 b   t         |           |j                  | _        d| _        t	        |      | _        |j                  | _        |j                  | _        | j                  r*| j                  st        |  d      t	        |      | _	        t        |      | _        t        |      | _        y )Nr   z> should be used as a decoder model if cross attention is added)r0   r1   chunk_size_feed_forwardseq_len_dimr   	attentionr   add_cross_attention	Exceptioncrossattentionr   intermediater   r   rV   s     r*   r1   zBrosLayer.__init__  s    '-'E'E$&v. ++#)#=#= ##??4&(f ghh"/"7D,V4 (r)   r   r]   r   r   r   r   r   r9   c                    | j                  |||||      }|d   }	| j                  r|dd }
n|dd  }
| j                  rA|?t        | d      rt        d|  d      | j	                  |	|||||      }|d   }	|
|dd z   }
t        | j                  | j                  | j                  |	      }|f|
z   }
| j                  r|
d	z   }
|
S )
N)r]   r   r   r   r   r   r<   r   z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`)r   r   r   r   r   rR   )	r   r   r   r   r   r   feed_forward_chunkr   r   )r5   r   r]   r   r   r   r   r   self_attention_outputsr   r   cross_attention_outputslayer_outputs                r*   rJ   zBrosLayer.forward  s%    "&%)/ "0 "
 2!4 ??,Qr2G,QR0G??4@t-.=dV  Dd  e  '+&9&9 -#&;'="3 ': '#  7q9 7" ==G0##((	
  /G+ ??'Gr)   c                 L    | j                  |      }| j                  ||      }|S rR   )r   r   )r5   r   intermediate_outputr  s       r*   r   zBrosLayer.feed_forward_chunk  s,    "//0@A{{#68HIr)   r   )r    r!   r"   r1   r$   rL   r   r%   r   r'   rJ   r   rM   rN   s   @r*   r   r     s    )$ 7;15=A>B,16||6 ll6 !!2!23	6
 E--.6  ((9(9:6 !)):): ;6 $D>6 
u||	6pr)   r   c                   .    e Zd Z fdZe	 	 	 	 	 	 	 ddej                  dej                  deej                     deej                     deej                     deej                     dee	   d	ee	   d
ee	   de
eej                     ef   fd       Z xZS )BrosEncoderc                     t         |           || _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        y c c}w rR   )	r0   r1   r6   r   
ModuleListrY   num_hidden_layersr   layer)r5   r6   _r7   s      r*   r1   zBrosEncoder.__init__  sC    ]]uVE]E]?^#_!If$5#_`
#_s   Ar   r]   r   r   r   r   r   output_hidden_statesreturn_dictr9   c
           
      \   |rdnd }
|rdnd }|r| j                   j                  rdnd }t        | j                        D ]V  \  }}|r|
|fz   }
|||   nd } ||||||||      }|d   }|s.||d   fz   }| j                   j                  sN||d   fz   }X |r|
|fz   }
t	        ||
||      S )Nr(   r   r   r   r;   )last_hidden_stater   r   cross_attentions)r6   r   	enumerater  r   )r5   r   r]   r   r   r   r   r   r  r  all_hidden_statesall_self_attentionsall_cross_attentionsr\   layer_modulelayer_head_masklayer_outputss                    r*   rJ   zBrosEncoder.forward  s     #7BD$5b4%64;;;Z;Zr`d(4 	VOA|#$58H$H!.7.CilO(+)-)&;'="3M *!,M &9]1=M<O&O#;;22+?=QRCSBU+U()	V,   1]4D D1++*1	
 	
r)   )NNNNFFT)r    r!   r"   r1   r   r$   rL   r   r%   r   r   r'   r   rJ   rM   rN   s   @r*   r  r    s    a
 
 7;15=A>B,1/4&*.
||.
 ll.
 !!2!23	.

 E--..
  ((9(9:.
 !)):): ;.
 $D>.
 'tn.
 d^.
 
uU\\"$FF	G.
 .
r)   r  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )
BrosPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y rR   )r0   r1   r   rc   r{   r   Tanh
activationrV   s     r*   r1   zBrosPooler.__init__  s9    YYv1163E3EF
'')r)   r   r9   c                 \    |d d df   }| j                  |      }| j                  |      }|S )Nr   )r   r  )r5   r   first_token_tensorpooled_outputs       r*   rJ   zBrosPooler.forward  s6     +1a40

#566r)   rK   rN   s   @r*   r  r    s#    $
U\\ ell r)   r  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BrosRelationExtractorc                 R   t         |           |j                  | _        |j                  | _        |j                  | _        |j                  | _        t        j                  | j                        | _	        t        j                  | j                  | j                  | j
                  z        | _        t        j                  | j                  | j                  | j
                  z        | _        t        j                  t        j                  d| j                              | _        y )Nr   )r0   r1   n_relationsr{   backbone_hidden_sizehead_hidden_sizeclassifier_dropout_probr   r   droprc   r   r   	Parameterr$   r   
dummy_noderV   s     r*   r1   zBrosRelationExtractor.__init__  s    !--$*$6$6! & 2 2'-'E'E$JJt;;<	YYt88$:J:JTMbMb:bc
99T668H8H4K`K`8`a,,u{{1d6O6O'PQr)   r   r   c           	         | j                  | j                  |            }| j                  j                  d      j	                  d|j                  d      d      }t        j                  ||gd      }| j                  | j                  |            }|j                  |j                  d      |j                  d      | j                  | j                        }|j                  |j                  d      |j                  d      | j                  | j                        }t        j                  |j                  dddd      |j                  dddd            }|S )Nr   r   axisr;   r   )r   r(  r*  	unsqueezerepeatr?   r$   rA   r   r@   r$  r&  r   r   )r5   r   r   	dummy_vecrelation_scores        r*   rJ   zBrosRelationExtractor.forward*  s   jj;!78OO--a0779>>!;LaP	IIy)41=	HHTYYy12	!&&Q!1!1!!4d6F6FH]H]
 NN9>>!#4innQ6GIYIY[_[p[pq	1a+Y->->q!Q-J
 r)   rK   rN   s   @r*   r"  r"    s$    R5<< ELL r)   r"  c                   <    e Zd ZU eed<   dZdej                  fdZy)BrosPreTrainedModelr6   brosmodulec                 V   | j                   j                  }t        |t        j                        rY|j
                  j                  j                  d|       |j                  %|j                  j                  j                          yyt        |t        j                        rf|j
                  j                  j                  d|       |j                  2|j
                  j                  |j                     j                          yyt        |t        j                        rJ|j                  j                  j                          |j
                  j                  j                  d       yt        |t              r,t        j                  j                  |j                   |       yy)zInitialize the weightsr.   )meanstdNg      ?)r8  )r6   initializer_ranger   r   rc   weightdatanormal_ra   zero_ry   rn   r   fill_r"  initr*  )r5   r5  r8  s      r*   _init_weightsz!BrosPreTrainedModel._init_weightsB  s&   kk++fbii( MM&&CS&9{{&  &&( '-MM&&CS&9!!-""6#5#56<<> .-KK""$MM$$S) 56GGOOF--3O7 7r)   N)	r    r!   r"   r   r&   base_model_prefixr   Moduler@  r(   r)   r*   r3  r3  =  s    8BII 8r)   r3  c                       e Zd Zd fd	Zd Zd Zd Zee	 	 	 	 	 	 	 	 	 	 	 	 dde	e
j                     de	e
j                     de	e
j                     de	e
j                     d	e	e
j                     d
e	e
j                     de	e
j                     de	e
j                     de	e
j                     de	e   de	e   de	e   deee
j                     ef   fd              Z xZS )	BrosModelc                     t         |   |       || _        t        |      | _        t        |      | _        t        |      | _        |rt        |      nd| _
        | j                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)r0   r1   r6   rl   r   r_   bbox_embeddingsr  encoderr  poolerinit_weights)r5   r6   add_pooling_layerr7   s      r*   r1   zBrosModel.__init__X  sZ    
 	 ,V41&9"6*,=j(4r)   c                 .    | j                   j                  S rR   r   r}   )r5   s    r*   get_input_embeddingszBrosModel.get_input_embeddingsh  s    ...r)   c                 &    || j                   _        y rR   rL  )r5   r   s     r*   set_input_embeddingszBrosModel.set_input_embeddingsk  s    */'r)   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsrG  r  r   r   )r5   heads_to_pruner  r   s       r*   _prune_headszBrosModel._prune_headsn  sE    
 +002 	CLE5LLu%//;;EB	Cr)   r   rW   r   rt   rs   r   r   r   r   r   r  r  r9   c                 T   |
|
n| j                   j                  }
||n| j                   j                  }||n| j                   j                  }||t	        d      ||j                         }n!||j                         dd }nt	        d      |t	        d      |\  }}||j                  n|j                  }|t        j                  ||      }|pt        | j                  d      r4| j                  j                  ddd|f   }|j                  ||      }|}n&t        j                  |t        j                  |      }| j                  |||      }| j                   j                   rE|C|j                         \  }}}||f}|	t        j                  ||      }	| j#                  |	      }nd}| j%                  || j                   j&                        }| j                  ||||	      }|j(                  d   d
k(  r|ddddg df   }|| j                   j*                  z  }| j-                  |      }| j/                  |||||||
|d	      }|d   }| j0                  | j1                  |      nd}t3        |||j4                  |j6                  |j8                        S )a  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosModel

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosModel.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        ```NzDYou cannot specify both input_ids and inputs_embeds at the same timer<   z5You have to specify either input_ids or inputs_embedszYou have to specify bbox)rw   rt   ru   )r   rs   rt   r      )r   r   r;   r   r;   r   r   r   T)r]   r   r   r   r   r   r  r  r   )r  pooler_outputr   r   r  )r6   r   r  use_return_dictr   r?   rw   r$   onesr   r   rt   r   r   r   get_extended_attention_maskr   invert_attention_maskget_head_maskr
  r   
bbox_scalerF  rG  rH  r   r   r   r  )r5   r   rW   r   rt   rs   r   r   r   r   r   r  r  r   r   r   rw   r   r   extended_attention_maskencoder_batch_sizeencoder_sequence_lengthr  encoder_hidden_shapeencoder_extended_attention_maskembedding_outputscaled_bboxbbox_position_embeddingsencoder_outputssequence_outputr   s                                  r*   rJ   zBrosModel.forwardv  s   P 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"#..*K&',,.s3KTUU<788!,
J%.%:!!@T@T!"ZZFCN!t(89*.//*H*HKZK*X'3J3Q3QR\^h3i0!A!&[

SY!Z 150P0PQ_alnt0u ;;!!&;&G=R=W=W=Y: 7$68O#P %-).4HQW)X&.2.H.HI_.`+.2+ &&y$++2O2OP	??%)'	 + 
 ::b>Q1667DT[[333#'#7#7#D ,,12"7#B/!5 ' 

 *!,8<8OO4UY;-')77&11,==
 	
r)   )TNNNNNNNNNNNN)r    r!   r"   r1   rM  rO  rS  r   r   r   r$   rL   r   r   r'   r   rJ   rM   rN   s   @r*   rD  rD  V  sZ    /0C  -1'+1515/3,0048<9=,0/3&*}
ELL)}
 u||$}
 !.	}

 !.}
 u||,}
 ELL)}
  -}
  (5}
 !) 6}
 $D>}
 'tn}
 d^}
 
uU\\"$PP	Q}
  }
r)   rD  c                       e Zd ZdgZ fdZee	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
eej                     deej                     dee
   dee
   dee
   deeej                     ef   fd              Z xZS )BrosForTokenClassificationrH  c                 `   t         |   |       |j                  | _        t        |      | _        t        |d      r|j                  n|j                  }t        j                  |      | _
        t        j                  |j                  |j                        | _        | j                          y Nclassifier_dropout)r0   r1   
num_labelsrD  r4  r   rl  r   r   r   r   rc   r{   
classifierrI  r5   r6   rl  r7   s      r*   r1   z#BrosForTokenClassification.__init__  s      ++f%	)09M)NF%%TZTnTn 	 zz"45))F$6$68I8IJr)   r   rW   r   bbox_first_token_maskrt   rs   r   r   labelsr   r  r  r9   c                    ||n| j                   j                  }| j                  ||||||||
|d
      }|d   }| j                  |      }| j	                  |      }d}|	t               }|J|j                  d      } ||j                  d| j                        |   |	j                  d      |         }n2 ||j                  d| j                        |	j                  d            }t        |||j                  |j                        S )a  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.
        bbox_first_token_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```NT)	rW   r   rt   rs   r   r   r   r  r  r   r<   r   logitsr   r   )r6   rW  r4  r   rn  r   r@   rm  r   r   r   )r5   r   rW   r   rp  rt   rs   r   r   rq  r   r  r  r   rf  rt  r   loss_fcts                     r*   rJ   z"BrosForTokenClassification.forward	  s   Z &1%<k$++B]B]))))%'/!5  
 "!*,,71')H$0(=(B(B2(F%KKDOO45JKV[[Y[_]rMs  B @&++b/R$!//))	
 	
r)   rg  r    r!   r"   "_keys_to_ignore_on_load_unexpectedr1   r   r   r   r$   rL   r   r   r'   r   rJ   rM   rN   s   @r*   ri  ri    sR   *3&  -1'+158<15/3,004)-,0/3&*O
ELL)O
 u||$O
 !.	O

  (5O
 !.O
 u||,O
 ELL)O
  -O
 &O
 $D>O
 'tnO
 d^O
 
uU\\"$99	:O
  O
r)   ri  a  
    Bros Model with a token classification head on top (initial_token_layers and subsequent_token_layer on top of the
    hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. The initial_token_classifier is used to
    predict the first token of each entity, and the subsequent_token_classifier is used to predict the subsequent
    tokens within an entity. Compared to BrosForTokenClassification, this model is more robust to serialization errors
    since it predicts next token from one token.
    c            !           e Zd ZdgZ fdZee	 	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
eej                     deej                     deej                     dee
   dee
   dee
   deeej                     ef   fd              Z xZS )!BrosSpadeEEForTokenClassificationrH  c           	      f   t         |   |       || _        |j                  | _        |j                  | _        |j
                  | _        t        |      | _        t        |d      r|j                  n|j                  }t        j                  t        j                  |      t        j                  |j
                  |j
                        t        j                  |      t        j                  |j
                  |j                              | _        t#        |      | _        | j'                          y rk  )r0   r1   r6   rm  r$  r{   r%  rD  r4  r   rl  r   r   
Sequentialr   rc   initial_token_classifierr"  subsequent_token_classifierrI  ro  s      r*   r1   z*BrosSpadeEEForTokenClassification.__init__i  s      ++!--$*$6$6!f%	)09M)NF%%TZTnTn 	
 )+JJ)*IIf((&*<*<=JJ)*IIf((&*;*;<	)
% ,A+H(r)   r   rW   r   rp  rt   rs   r   r   initial_token_labelssubsequent_token_labelsr   r  r  r9   c                    ||n| j                   j                  }| j                  |||||||||d
      }|d   }|j                  dd      j	                         }| j                  |      j                  dd      j	                         }| j                  ||      j                  d      }d|z
  }|j                  \  }}|j                  }t        j                  |t        j                  |dg      j                  |      gd      j                         }|j                  |dddddf   t        j                   |j"                        j$                        }t        j&                  ||dz         j                  |t        j                        }|j                  |dddddf   t        j                   |j"                        j$                        }|j)                  d      j                         }d}|	|
t+               }|	j)                  d      }	|;|j)                  d      } ||j)                  d| j,                        |   |	|         }n# ||j)                  d| j,                        |	      }|
j)                  d      }
 ||j)                  d|dz         |   |
|         }||z   }t/        ||||j0                  |j2                  	      S )
a>  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.
        bbox_first_token_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.
        initial_token_labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for the initial token classification.
        subsequent_token_labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for the subsequent token classification.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosSpadeEEForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosSpadeEEForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```NT
r   rW   r   rt   rs   r   r   r   r  r  r   r   r,  rw   rv   r<   )r   r   r   r   r   )r6   rW  r4  rh   r   r|  r}  squeezer   rw   r$   rA   r   r   r   masked_fillfinforv   mineyer@   r   rm  r   r   r   )r5   r   rW   r   rp  rt   rs   r   r   r~  r  r   r  r  r   last_hidden_statesr   r   inv_attention_maskr   max_seq_lengthrw   invalid_token_maskself_token_masksubsequent_token_maskr   ru  initial_token_losssubsequent_token_losss                                r*   rJ   z)BrosSpadeEEForTokenClassification.forward  s   d &1%<k$++B]B]))))%'/!5  
 %QZ/99!Q?JJL#<<=OPZZ[\^_`kkm"&"B"BCUWi"j"r"rst"u /%7%=%="
N#**"YY(:EKKUV<X<[<[\b<c'dklmrrt"9"E"Eq$z*EKK8O8U8U,V,Z,Z#
  ))NNQ4FGJJRX`e`j`jJk"9"E"ED!QJ'5L5R5R)S)W)W#
 !/ 3 3B 7 < < >+0G0S')H $8#<#<R#@ $0(=(B(B2(F%%-(--b$//BCXY()>?&"
 &..B.G.GDOO.\^r%s"&=&B&B2&F#$,',,R!1CDEZ['(=>%!
 &(==D!5$;!//))
 	
r)   )NNNNNNNNNNNNN)r    r!   r"   rw  r1   r   r   r   r$   rL   r   r   r'   r   rJ   rM   rN   s   @r*   ry  ry  ]  sj    +4&2  -1'+158<15/3,0047;:>,0/3&*o
ELL)o
 u||$o
 !.	o

  (5o
 !.o
 u||,o
 ELL)o
  -o
 'u||4o
 "*%,,!7o
 $D>o
 'tno
 d^o
 
uU\\"O3	4o
  o
r)   ry  z
    Bros Model with a token classification head on top (a entity_linker layer on top of the hidden-states output) e.g.
    for Entity-Linking. The entity_linker is used to predict intra-entity links (one entity to another entity).
    c                       e Zd ZdgZ fdZee	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
eej                     deej                     dee
   dee
   dee
   deeej                     ef   fd              Z xZS )!BrosSpadeELForTokenClassificationrH  c                 @   t         |   |       || _        |j                  | _        |j                  | _        |j
                  | _        t        |      | _        t        |d      r|j                  n|j                   t        |      | _        | j                          y rk  )r0   r1   r6   rm  r$  r{   r%  rD  r4  r   rl  r   r"  entity_linkerrI  rV   s     r*   r1   z*BrosSpadeELForTokenClassification.__init__  s      ++!--$*$6$6!f%	&-f6J&K	"	"QWQkQk26:r)   r   rW   r   rp  rt   rs   r   r   rq  r   r  r  r9   c                    ||n| j                   j                  }| j                  ||||||||
|d
      }|d   }|j                  dd      j	                         }| j                  ||      j                  d      }d}|	et               }|j                  \  }}|j                  }t        j                  ||dz         j                  |t        j                        }|j                  d      }t        j                  | t        j                   |dgt        j                  |      gd	      }|j#                  |dddddf   t        j$                  |j&                        j(                        }|j#                  |dddddf   t        j$                  |j&                        j(                        } ||j                  d|dz         |   |	j                  d      |         }t+        |||j,                  |j.                  
      S )a  
        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.
        bbox_first_token_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosSpadeELForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosSpadeELForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```NTr  r   r   r  r<   ru   r,  rs  )r6   rW  r4  rh   r   r  r  r   r   rw   r$   r  r   r   r@   rA   r   r  r  rv   r  r   r   r   )r5   r   rW   r   rp  rt   rs   r   r   rq  r   r  r  r   r  rt  r   ru  r   r  rw   r  masks                          r*   rJ   z)BrosSpadeELForTokenClassification.forward  s   X &1%<k$++B]B]))))%'/!5  
 %QZ/99!Q?JJL##$68JKSSTUV')H)7)=)=&J#**F#ii8JKNNV\didndnNoO(--b1D$)II**KKQuzz&Q %! ''(=aqj(I5;;W]WcWcKdKhKhiF''a
(CU[[QWQ]Q]E^EbEbcFFKKNQ,>?Ev{{SUW[G\]D$!//))	
 	
r)   rg  rv  rN   s   @r*   r  r    sT    +4&  -1'+158<15/3,004)-,0/3&*Y
ELL)Y
 u||$Y
 !.	Y

  (5Y
 !.Y
 u||,Y
 ELL)Y
  -Y
 &Y
 $D>Y
 'tnY
 d^Y
 
uU\\"$99	:Y
  Y
r)   r  )r3  rD  ri  ry  r  )9r#   r   dataclassesr   typingr   r   r$   torch.utils.checkpointr   torch.nnr   activationsr	   modeling_layersr
   modeling_outputsr   r   r   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   r   r   configuration_brosr   
get_loggerr    loggerr   rB  r,   rP   r_   rl   r   r   r   r   r   r   r  r  r"  r3  rD  ri  ry  r  __all__r(   r)   r*   <module>r     s     ! "    % ! 9 
 . l l K K * 
		H	% 
:k : :"		 *		 & > >Bi		 iZRYY 1BII 1jryy  J* JZ5
")) 5
r BII D 8/ 8 80 ^
# ^
 ^
B a
!4 a
 a
H M
(; M
M
` l
(; l
l
^r)   