
    rh                     T   d Z ddlZddlmZmZ ddlZddlZddlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZ dd	lmZmZmZmZmZ dd
lmZ ddlmZmZmZ ddlmZmZ ddlm Z   ejB                  e"      Z# G d dejH                        Z% G d dejH                        Z& G d dejH                        Z' G d dejH                        Z( G d dejH                        Z) G d dejH                        Z* G d dejH                        Z+ G d de      Z, G d d ejH                        Z- G d! d"ejH                        Z.e G d# d$e             Z/e G d% d&e/             Z0 ed'(       G d) d*e/             Z1e G d+ d,e/             Z2 G d- d.ejH                        Z3e G d/ d0e/             Z4g d1Z5y)2zPyTorch LiLT model.    N)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPoolingQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )
LiltConfigc                   :     e Zd Z fdZ	 	 	 	 ddZd Zd Z xZS )LiltTextEmbeddingsc                 8   t         |           t        j                  |j                  |j
                  |j                        | _        t        j                  |j                  |j
                        | _	        t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        | j#                  dt%        j&                  |j                        j)                  d      d       t+        |dd      | _        |j                  | _        t        j                  |j                  |j
                  | j.                        | _	        y )	Npadding_idxepsposition_ids)r   F)
persistentposition_embedding_typeabsolute)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandgetattrr#   r   selfconfig	__class__s     y/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/lilt/modeling_lilt.pyr&   zLiltTextEmbeddings.__init__,   s2   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]" f&8&8f>S>STzz&"<"<= 	ELL)G)GHOOPWXej 	 	
 (/v7PR\']$ "..#%<<**F,>,>DL\L\$
     c                 D   |I|6| j                  || j                        j                  |j                        }n| j	                  |      }||j                         }n|j                         d d }|:t        j                  |t        j                  | j                  j                        }|| j                  |      }| j                  |      }||z   }| j                  dk(  r| j                  |      }||z  }| j                  |      }| j                  |      }||fS )Nr!   dtypedevicer$   )"create_position_ids_from_input_idsr   torC   &create_position_ids_from_inputs_embedssizer6   zeroslongr    r+   r/   r#   r-   r0   r4   )	r;   	input_idstoken_type_idsr    inputs_embedsinput_shaper/   
embeddingsr-   s	            r>   forwardzLiltTextEmbeddings.forwardC   s!    $#FFyRVRbRbcff$$   $JJ=Y #..*K',,.s3K!"[[EJJtO`O`OgOghN  00;M $ : :> J"%::
'':5"&":":<"H--J^^J/
\\*-
<''r?   c                     |j                  |      j                         }t        j                  |d      j	                  |      |z  }|j                         |z   S )a  
        Args:
        Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding
        symbols are ignored. This is modified from fairseq's `utils.make_positions`.
            x: torch.Tensor x:
        Returns: torch.Tensor
        r   dim)neintr6   cumsumtype_asrI   )r;   rJ   r   maskincremental_indicess        r>   rD   z5LiltTextEmbeddings.create_position_ids_from_input_idsg   sP     ||K(,,.$||Da8@@F$N"'')K77r?   c                    |j                         dd }|d   }t        j                  | j                  dz   || j                  z   dz   t        j                  |j
                        }|j                  d      j                  |      S )z
        Args:
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.:
            inputs_embeds: torch.Tensor
        Returns: torch.Tensor
        Nr!   r   rA   r   )rG   r6   r7   r   rI   rC   	unsqueezer8   )r;   rL   rM   sequence_lengthr    s        r>   rF   z9LiltTextEmbeddings.create_position_ids_from_inputs_embedst   s     $((*3B/%a.||q /D4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<r?   )NNNN)__name__
__module____qualname__r&   rO   rD   rF   __classcell__r=   s   @r>   r   r   +   s&    
2 "(H8=r?   r   c                   &     e Zd Z fdZddZ xZS )LiltLayoutEmbeddingsc                    t         |           t        j                  |j                  |j
                  dz        | _        t        j                  |j                  |j
                  dz        | _        t        j                  |j                  |j
                  dz        | _        t        j                  |j                  |j
                  dz        | _	        |j                  | _        t        j                  |j                  |j
                  |j                  z  | j                        | _        t        j                  |j
                  |j
                  |j                  z        | _        t        j"                  |j
                  |j                  z  |j$                        | _        t        j&                  |j(                        | _        y )N   r   )in_featuresout_featuresr   )r%   r&   r   r'   max_2d_position_embeddingsr)   x_position_embeddingsy_position_embeddingsh_position_embeddingsw_position_embeddingsr*   r   r,   channel_shrink_ratiobox_position_embeddingsLinearbox_linear_embeddingsr0   r1   r2   r3   r4   r:   s     r>   r&   zLiltLayoutEmbeddings.__init__   s^    &(\\&2S2SU[UgUgklUl%m"%'\\&2S2SU[UgUgklUl%m"%'\\&2S2SU[UgUgklUl%m"%'\\&2S2SU[UgUgklUl%m"!..')||**&"="==(((
$
 &(YY**9K9KvOjOj9j&
" f&8&8F<W<W&W]c]r]rszz&"<"<=r?   c                    	 | j                  |d d d d df         }| j                  |d d d d df         }| j                  |d d d d df         }| j                  |d d d d df         }| j                  |d d d d df   |d d d d df   z
        }| j	                  |d d d d df   |d d d d df   z
        }	t        j                  ||||||	gd      }
| j                  |
      }
| j                  |      }|
|z   }
| j                  |
      }
| j                  |
      }
|
S # t        $ r}t        d      |d }~ww xY w)Nr   r      r	   z;The `bbox` coordinate values should be within 0-1000 range.r!   rQ   )rh   ri   
IndexErrorrj   rk   r6   catro   rm   r0   r4   )r;   bboxr    left_position_embeddingsupper_position_embeddingsright_position_embeddingslower_position_embeddingserj   rk   spatial_position_embeddingsrm   s               r>   rO   zLiltLayoutEmbeddings.forward   sw   	c'+'A'A$q!Qw-'P$(,(B(B41a=(Q%(,(B(B41a=(Q%(,(B(B41a=(Q% !% : :41a=4PQSTVWPW=;X Y $ : :41a=4PQSTVWPW=;X Y&+ii()))%% 
'
# '+&@&@A\&]#"&">">|"L&AD[&[#&*nn5P&Q#&*ll3N&O#**3  	cZ[abb	cs   A,D& &	E /D;;E NN)r\   r]   r^   r&   rO   r_   r`   s   @r>   rb   rb      s    >*+r?   rb   c                   6     e Zd Zd fd	ZddZ	 	 	 ddZ xZS )LiltSelfAttentionc                    t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  |j                  z  | j                  |j                  z        | _        t        j                  |j                  |j                  z  | j                  |j                  z        | _        t        j                  |j                  |j                  z  | j                  |j                  z        | _        t        j$                  |j&                        | _        |xs t+        |dd      | _        | j,                  dk(  s| j,                  d	k(  rF|j.                  | _        t        j0                  d
|j.                  z  dz
  | j                        | _        |j                  | _        || _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()r#   r$   relative_keyrelative_key_queryrq   r   )r%   r&   r)   num_attention_headshasattr
ValueErrorrT   attention_head_sizeall_head_sizer   rn   querykeyvaluerl   layout_query
layout_keylayout_valuer2   attention_probs_dropout_probr4   r9   r#   r,   r'   distance_embedding	layer_idx)r;   r<   r#   r   r=   s       r>   r&   zLiltSelfAttention.__init__   sU    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
II&"="==t?Q?QU[UpUp?p
 ))&"="==t?Q?QU[UpUp?p
 II&"="==t?Q?QU[UpUp?p
 zz&"E"EF'> (
'-zC
$ ''>9T=Y=Y]q=q+1+I+ID(&(ll1v7U7U3UXY3Y[_[s[s&tD#$*$?$?!"r?   c                     |j                         d d | j                  | j                  |z  fz   } |j                  | }|j	                  dddd      S )Nr!   r   rq   r   r	   )rG   r   r   viewpermute)r;   xrnew_x_shapes       r>   transpose_for_scoresz&LiltSelfAttention.transpose_for_scores   sT    ffhsmt'?'?AYAY]^A^&__AFFK yyAq!$$r?   c                    | j                  | j                  |      | j                        }| j                  | j                  |      | j                        }| j                  | j	                  |      | j                        }| j                  |      }	| j                  | j                  |            }
| j                  | j                  |            }| j                  |	      }t        j                  ||
j                  dd            }t        j                  ||j                  dd            }| j                  dk(  s| j                  dk(  rF|j                         d   }t        j                  |t        j                  |j                        j!                  dd      }t        j                  |t        j                  |j                        j!                  dd      }||z
  }| j#                  || j$                  z   dz
        }|j'                  |j(                        }| j                  dk(  rt        j*                  d	||      }||z   }nE| j                  dk(  r6t        j*                  d	||      }t        j*                  d
|
|      }||z   |z   }|t-        j.                  | j0                        z  }|t-        j.                  | j0                  | j                  z        z  }||z   }||z   }|||z   } t3        j4                  d      |      }| j7                  |      }|||z  }t        j                  ||      }|j9                  dddd      j;                         }|j                         d d | j<                  | j                  z  fz   } |j                   | }|||z   } t3        j4                  d      |      }| j7                  |      }|||z  }t        j                  ||      }|j9                  dddd      j;                         }|j                         d d | j<                  fz   } |j                   | }|r||f|f}|S ||ff}|S )N)r   r!   r   r   r   rA   )rB   zbhld,lrd->bhlrzbhrd,lrd->bhlrrQ   r   rq   r	   )r   r   rl   r   r   r   r   r   r6   matmul	transposer#   rG   r7   rI   rC   r   r   r,   rE   rB   einsummathsqrtr   r   Softmaxr4   r   
contiguousr   )r;   hidden_stateslayout_inputsattention_mask	head_maskoutput_attentionslayout_value_layerlayout_key_layerlayout_query_layermixed_query_layer	key_layervalue_layerquery_layerattention_scoreslayout_attention_scores
seq_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keytmp_attention_scorestmp_layout_attention_scoreslayout_attention_probslayout_context_layernew_context_layer_shapeattention_probscontext_layeroutputss                                  r>   rO   zLiltSelfAttention.forward   sd    "66t7H7H7W[_[t[t6u44T__]5SW[WpWp4q!66t7H7H7W[_[t[t6u JJ}5--dhh}.EF	//

=0IJ//0AB <<Y5H5HR5PQ"',,/ACSC]C]^`bdCe"f''>9T=Y=Y]q=q&++-a0J"\\*EJJ}OcOcdiijlnopN"\\*EJJ}OcOcdiijkmopN%6H#'#:#:8dFbFb;bef;f#g #7#:#:ARAR#:#S ++~=+0<<8H+Wk+l(#36N#N --1EE16>NP[]q1r./4||<LiYm/n,#36T#TWs#s /$))D<T<T2UU&=		$$(A(AAA
 '
# 02MM"=@T"T%&=&N# "4!34K!L "&.D!E  %;i%G"$||,BDVW3;;Aq!QGRRT"6";";"=cr"BdFXFX\`\u\uFuEw"w8388:QR%/.@ -"**,-=> ,,7  -	9O_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S***,CD ! 12OD 	  !"679 	 r?   r{   )r   NNF)r\   r]   r^   r&   r   rO   r_   r`   s   @r>   r}   r}      s    ##J% \r?   r}   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )LiltSelfOutputc                 (   t         |           t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                        | _
        y Nr   )r%   r&   r   rn   r)   denser0   r1   r2   r3   r4   r:   s     r>   r&   zLiltSelfOutput.__init__H  s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=r?   r   input_tensorreturnc                 r    | j                  |      }| j                  |      }| j                  ||z         }|S Nr   r4   r0   r;   r   r   s      r>   rO   zLiltSelfOutput.forwardN  7    

=1]3}|'CDr?   r\   r]   r^   r&   r6   TensorrO   r_   r`   s   @r>   r   r   G  1    >U\\  RWR^R^ r?   r   c                        e Zd Zd
 fd	Zd Z	 	 	 ddej                  dej                  deej                     deej                     dee	   de
ej                     fd	Z xZS )LiltAttentionc                    t         |           t        |||      | _        t	        |      | _        t               | _        |j                  }|j                  |j                  z  |_        t	        |      | _
        ||_        y )N)r#   r   )r%   r&   r}   r;   r   outputsetpruned_headsr)   rl   layout_output)r;   r<   r#   r   ori_hidden_sizer=   s        r>   r&   zLiltAttention.__init__V  so    %fF]irs	$V,E ,,#//63N3NN+F3,r?   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   rQ   )lenr   r;   r   r   r   r   r   r   r   r   r   r   union)r;   headsindexs      r>   prune_headszLiltAttention.prune_headsb  s   u:?749900$))2O2OQUQbQb
u
 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r?   r   r   r   r   r   r   c                     | j                  |||||      }| j                  |d   d   |      }| j                  |d   d   |      }||ff|dd  z   }	|	S )Nr   r   )r;   r   r   )
r;   r   r   r   r   r   self_outputsattention_outputlayout_attention_outputr   s
             r>   rO   zLiltAttention.forwardt  sz     yy
  ;;|Aq'9=I"&"4"4\!_Q5G"W$&=>@<PQPRCSSr?   r{   r   )r\   r]   r^   r&   r   r6   r   r   FloatTensorbooltuplerO   r_   r`   s   @r>   r   r   U  s    	-;, 7;15,1|| || !!2!23	
 E--. $D> 
u||	r?   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )LiltIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y r   )r%   r&   r   rn   r)   intermediate_sizer   
isinstance
hidden_actstrr
   intermediate_act_fnr:   s     r>   r&   zLiltIntermediate.__init__  s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$r?   r   r   c                 J    | j                  |      }| j                  |      }|S r   )r   r   )r;   r   s     r>   rO   zLiltIntermediate.forward  s&    

=100?r?   r   r`   s   @r>   r   r     s#    9U\\ ell r?   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )
LiltOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )r%   r&   r   rn   r   r)   r   r0   r1   r2   r3   r4   r:   s     r>   r&   zLiltOutput.__init__  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r?   r   r   r   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S r   r   r   s      r>   rO   zLiltOutput.forward  r   r?   r   r`   s   @r>   r   r     r   r?   r   c                        e Zd Zd fd	Z	 	 	 ddej
                  dej
                  deej                     deej                     dee   de	ej
                     fdZ
d	 Zd
 Z xZS )	LiltLayerc                    t         |           |j                  | _        d| _        t	        ||      | _        t        |      | _        t        |      | _	        |j                  }|j                  }|j                  |j                  z  |_
        |j                  |j                  z  |_        t        |      | _        t        |      | _        ||_
        ||_        y )Nr   r   )r%   r&   chunk_size_feed_forwardseq_len_dimr   	attentionr   intermediater   r   r)   r   rl   layout_intermediater   )r;   r<   r   r   ori_intermediate_sizer=   s        r>   r&   zLiltLayer.__init__  s    '-'E'E$&vC,V4 ( ,, & 8 8#//63N3NN#)#;#;v?Z?Z#Z #3F#; '/,#8 r?   r   r   r   r   r   r   c                    | j                  |||||      }|d   d   }|d   d   }|dd  }	t        | j                  | j                  | j                  |      }
t        | j
                  | j                  | j                  |      }|
|ff|	z   }	|	S )N)r   r   r   )r   r   feed_forward_chunkr   r   layout_feed_forward_chunk)r;   r   r   r   r   r   self_attention_outputsr   r   r   layer_outputlayout_layer_outputs               r>   rO   zLiltLayer.forward  s     "&/ "0 "
 2!4Q7"8";A">(,0##T%A%A4CSCSUe
 8**D,H,H$JZJZ\s
 !"5687Br?   c                 L    | j                  |      }| j                  ||      }|S r   )r   r   r;   r   intermediate_outputr   s       r>   r   zLiltLayer.feed_forward_chunk  s,    "//0@A{{#68HIr?   c                 L    | j                  |      }| j                  ||      }|S r   )r   r   r  s       r>   r   z#LiltLayer.layout_feed_forward_chunk  s.    "667GH))*=?OPr?   r   r   )r\   r]   r^   r&   r6   r   r   r   r   r   rO   r   r   r_   r`   s   @r>   r   r     s    9* 7;15,1|| || !!2!23	
 E--. $D> 
u||	>
r?   r   c                        e Zd Zd fd	Z	 	 	 	 	 ddej
                  dej
                  deej                     deej                     dee   dee   dee   d	e	e
ej
                     ef   fd
Z xZS )LiltEncoderc           	          t         |           || _        t        j                  t        |j                        D cg c]  }t        ||       c}      | _        d| _	        y c c}w )Nr   F)
r%   r&   r<   r   
ModuleListrangenum_hidden_layersr   layergradient_checkpointing)r;   r<   r   ir=   s       r>   r&   zLiltEncoder.__init__  sQ    ]]ERXRjRjLk#lqIf$B#lm
&+# $ms   A%r   r   r   r   r   output_hidden_statesreturn_dictr   c                     |rdnd }|rdnd }	t        | j                        D ]>  \  }
}|r||fz   }|||
   nd } ||||||      }|d   d   }|d   d   }|s6|	|d   fz   }	@ |r||fz   }|st        d |||	fD              S t        |||	      S )N r   r   c              3   $   K   | ]  }|| 
 y wr   r  ).0vs     r>   	<genexpr>z&LiltEncoder.forward.<locals>.<genexpr>  s      
 = s   )last_hidden_stater   
attentions)	enumerater  r   r   )r;   r   r   r   r   r   r  r  all_hidden_statesall_self_attentionsr  layer_modulelayer_head_masklayer_outputss                 r>   rO   zLiltEncoder.forward  s     #7BD$5b4(4 	POA|#$58H$H!.7.CilO(!M *!,Q/M)!,Q/M &9]1=M<O&O#%	P(   1]4D D  "%'   ++*
 	
r?   r   )NNFFT)r\   r]   r^   r&   r6   r   r   r   r   r   r   r   rO   r_   r`   s   @r>   r  r    s    , 7;15,1/4&*2
||2
 ||2
 !!2!23	2

 E--.2
 $D>2
 'tn2
 d^2
 
uU\\"O3	42
r?   r  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )
LiltPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y r   )r%   r&   r   rn   r)   r   Tanh
activationr:   s     r>   r&   zLiltPooler.__init__#  s9    YYv1163E3EF
'')r?   r   r   c                 \    |d d df   }| j                  |      }| j                  |      }|S Nr   )r   r"  )r;   r   first_token_tensorpooled_outputs       r>   rO   zLiltPooler.forward(  s6     +1a40

#566r?   r   r`   s   @r>   r  r  "  s#    $
U\\ ell r?   r  c                   *    e Zd ZU eed<   dZdZg Zd Zy)LiltPreTrainedModelr<   liltTc                    t        |t        j                        rm|j                  j                  j                  d| j                  j                         |j                  %|j                  j                  j                          yyt        |t        j                        rz|j                  j                  j                  d| j                  j                         |j                  2|j                  j                  |j                     j                          yyt        |t        j                        rJ|j                  j                  j                          |j                  j                  j                  d       yy)zInitialize the weightsg        )meanstdNg      ?)r   r   rn   weightdatanormal_r<   initializer_rangebiaszero_r'   r   r0   fill_)r;   modules     r>   _init_weightsz!LiltPreTrainedModel._init_weights8  s   fbii( MM&&CT[[5R5R&S{{&  &&( '-MM&&CT[[5R5R&S!!-""6#5#56<<> .-KK""$MM$$S) .r?   N)	r\   r]   r^   r   __annotations__base_model_prefixsupports_gradient_checkpointing_no_split_modulesr5  r  r?   r>   r(  r(  1  s    &*#*r?   r(  c                   r    e Zd Zd fd	Zd Zd Zd Ze	 	 	 	 	 	 	 	 	 	 ddee	j                     dee	j                     dee	j                     dee	j                     d	ee	j                     d
ee	j                     dee	j                     dee   dee   dee   deee	j                     ef   fd       Z xZS )	LiltModelc                     t         |   |       || _        t        |      | _        t        |      | _        t        |      | _        |rt        |      nd| _
        | j                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)r%   r&   r<   r   rN   rb   layout_embeddingsr  encoderr  pooler	post_init)r;   r<   add_pooling_layerr=   s      r>   r&   zLiltModel.__init__K  sZ    
 	 ,V4!5f!="6*,=j(4 	r?   c                 .    | j                   j                  S r   rN   r+   )r;   s    r>   get_input_embeddingszLiltModel.get_input_embeddings\  s    ...r?   c                 &    || j                   _        y r   rC  )r;   r   s     r>   set_input_embeddingszLiltModel.set_input_embeddings_  s    */'r?   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr>  r  r   r   )r;   heads_to_pruner  r   s       r>   _prune_headszLiltModel._prune_headsb  sE    
 +002 	CLE5LLu%//;;EB	Cr?   rJ   rt   r   rK   r    r   rL   r   r  r  r   c           	         ||n| j                   j                  }|	|	n| j                   j                  }	|
|
n| j                   j                  }
||t	        d      |#| j                  ||       |j                         }n!||j                         dd }nt	        d      |\  }}||j                  n|j                  }|)t        j                  |dz   t        j                  |      }|t        j                  ||f|      }|pt        | j                  d      r4| j                  j                  ddd|f   }|j                  ||      }|}n&t        j                  |t        j                  |      }| j!                  ||      }| j#                  || j                   j$                        }| j                  ||||	      \  }}| j'                  ||
      }| j)                  ||||||	|
      }|d   }| j*                  | j+                  |      nd}|
s
||f|dd z   S t-        |||j.                  |j0                        S )a  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, AutoModel
        >>> from datasets import load_dataset

        >>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
        >>> model = AutoModel.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")

        >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
        >>> example = dataset[0]
        >>> words = example["tokens"]
        >>> boxes = example["bboxes"]

        >>> encoding = tokenizer(words, boxes=boxes, return_tensors="pt")

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        ```NzDYou cannot specify both input_ids and inputs_embeds at the same timer!   z5You have to specify either input_ids or inputs_embeds)   rA   )rC   rK   )rJ   r    rK   rL   )rt   r    )r   r   r   r  r  r   r   )r  pooler_outputr   r  )r<   r   r  use_return_dictr   %warn_if_padding_and_no_attention_maskrG   rC   r6   rH   rI   onesr   rN   rK   r8   get_extended_attention_maskget_head_maskr
  r=  r>  r?  r   r   r  )r;   rJ   rt   r   rK   r    r   rL   r   r  r  rM   
batch_sizer   rC   buffered_token_type_ids buffered_token_type_ids_expandedextended_attention_maskembedding_outputlayout_embedding_outputencoder_outputssequence_outputr&  s                          r>   rO   zLiltModel.forwardj  ss   P 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66y.Q#..*K&',,.s3KTUU!,
J%.%:!!@T@T<;;{T1FSD!"ZZ*j)A6RN!t(89*.//*H*HKZK*X'3J3Q3QR\^h3i0!A!&[

SY!Z 150P0PQ_al0m &&y$++2O2OP	)-%)'	 *9 *
&, #'"8"8dQ]"8"^,,#2/!5# ' 
 *!,8<8OO4UY#]3oab6III)-')77&11	
 	
r?   )T)
NNNNNNNNNN)r\   r]   r^   r&   rD  rF  rJ  r   r   r6   r   r   r   r   r   rO   r_   r`   s   @r>   r;  r;  I  s$   "/0C  -1'+1515/3,004,0/3&*p
ELL)p
 u||$p
 !.	p

 !.p
 u||,p
 ELL)p
  -p
 $D>p
 'tnp
 d^p
 
uU\\"$>>	?p
 p
r?   r;  z
    LiLT Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    )custom_introc                   ~    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
ee
   dee
   dee
   deeej                     ef   fd       Z xZS )LiltForSequenceClassificationc                     t         |   |       |j                  | _        || _        t	        |d      | _        t        |      | _        | j                          y NF)rA  )	r%   r&   
num_labelsr<   r;  r)  LiltClassificationHead
classifierr@  r:   s     r>   r&   z&LiltForSequenceClassification.__init__  sJ      ++f>	08 	r?   rJ   rt   r   rK   r    r   rL   labelsr   r  r  r   c                 V   ||n| j                   j                  }| j                  ||||||||	|
|
      }|d   }| j                  |      }d}||j	                  |j
                        }| j                   j                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j                  dk(  rIt               }| j                  dk(  r& ||j                         |j                               }n |||      }n| j                   j                  dk(  r=t               } ||j                  d| j                        |j                  d            }n,| j                   j                  dk(  rt!               } |||      }|s|f|d	d z   }||f|z   S |S t#        |||j$                  |j&                  
      S )a  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, AutoModelForSequenceClassification
        >>> from datasets import load_dataset

        >>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
        >>> model = AutoModelForSequenceClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")

        >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
        >>> example = dataset[0]
        >>> words = example["tokens"]
        >>> boxes = example["bboxes"]

        >>> encoding = tokenizer(words, boxes=boxes, return_tensors="pt")

        >>> outputs = model(**encoding)
        >>> predicted_class_idx = outputs.logits.argmax(-1).item()
        >>> predicted_class = model.config.id2label[predicted_class_idx]
        ```N	rt   r   rK   r    r   rL   r   r  r  r   r   
regressionsingle_label_classificationmulti_label_classificationr!   rq   losslogitsr   r  )r<   rN  r)  rb  rE   rC   problem_typer`  rB   r6   rI   rT   r   squeezer   r   r   r   r   r  r;   rJ   rt   r   rK   r    r   rL   rc  r   r  r  r   rZ  rk  rj  loss_fctr   s                     r>   rO   z%LiltForSequenceClassification.forward  s   \ &1%<k$++B]B]))))%'/!5#  
 "!*1YYv}}-F{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
r?   NNNNNNNNNNN)r\   r]   r^   r&   r   r   r6   
LongTensorr   r   r   r   r   r   rO   r_   r`   s   @r>   r]  r]    s9   	  15'+6:59371559-1,0/3&*`
E,,-`
 u||$`
 !!2!23	`

 !!1!12`
 u//0`
 E--.`
   1 12`
 ))*`
 $D>`
 'tn`
 d^`
 
uU\\"$<<	=`
 `
r?   r]  c                   ~    e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
ee	   dee	   dee	   de
eej                     ef   fd       Z xZS )LiltForTokenClassificationc                 d   t         |   |       |j                  | _        t        |d      | _        |j
                  |j
                  n|j                  }t        j                  |      | _	        t        j                  |j                  |j                        | _        | j                          y r_  )r%   r&   r`  r;  r)  classifier_dropoutr3   r   r2   r4   rn   r)   rb  r@  r;   r<   ru  r=   s      r>   r&   z#LiltForTokenClassification.__init__X  s      ++f>	)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	r?   rJ   rt   r   rK   r    r   rL   rc  r   r  r  r   c                    ||n| j                   j                  }| j                  ||||||||	|
|
      }|d   }| j                  |      }| j	                  |      }d}|W|j                  |j                        }t               } ||j                  d| j                        |j                  d            }|s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, AutoModelForTokenClassification
        >>> from datasets import load_dataset

        >>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
        >>> model = AutoModelForTokenClassification.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")

        >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
        >>> example = dataset[0]
        >>> words = example["tokens"]
        >>> boxes = example["bboxes"]

        >>> encoding = tokenizer(words, boxes=boxes, return_tensors="pt")

        >>> outputs = model(**encoding)
        >>> predicted_class_indices = outputs.logits.argmax(-1)
        ```Nre  r   r!   rq   ri  )r<   rN  r)  r4   rb  rE   rC   r   r   r`  r   r   r  rn  s                     r>   rO   z"LiltForTokenClassification.forwardf  s   V &1%<k$++B]B]))))%'/!5#  
 "!*,,71YYv}}-F')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
r?   rp  )r\   r]   r^   r&   r   r   r6   rq  r   r   r   r   r   r   rO   r_   r`   s   @r>   rs  rs  U  s;     15+/6:59371559-1,0/3&*N
E,,-N
 u''(N
 !!2!23	N

 !!1!12N
 u//0N
 E--.N
   1 12N
 ))*N
 $D>N
 'tnN
 d^N
 
uU\\"$99	:N
 N
r?   rs  c                   (     e Zd ZdZ fdZd Z xZS )ra  z-Head for sentence-level classification tasks.c                 Z   t         |           t        j                  |j                  |j                        | _        |j                  |j                  n|j                  }t        j                  |      | _	        t        j                  |j                  |j                        | _        y r   )r%   r&   r   rn   r)   r   ru  r3   r2   r4   r`  out_projrv  s      r>   r&   zLiltClassificationHead.__init__  s    YYv1163E3EF
)/)B)B)NF%%TZTnTn 	 zz"45		&"4"4f6G6GHr?   c                     |d d dd d f   }| j                  |      }| j                  |      }t        j                  |      }| j                  |      }| j	                  |      }|S r$  )r4   r   r6   tanhrz  )r;   featureskwargsr   s       r>   rO   zLiltClassificationHead.forward  sY    Q1WLLOJJqMJJqMLLOMM!r?   )r\   r]   r^   __doc__r&   rO   r_   r`   s   @r>   ra  ra    s    7Ir?   ra  c                       e Zd Z fdZe	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
eej                     dee	   dee	   dee	   de
eej                     ef   fd       Z xZS )LiltForQuestionAnsweringc                     t         |   |       |j                  | _        t        |d      | _        t        j                  |j                  |j                        | _        | j                          y r_  )
r%   r&   r`  r;  r)  r   rn   r)   
qa_outputsr@  r:   s     r>   r&   z!LiltForQuestionAnswering.__init__  sU      ++f>	))F$6$68I8IJ 	r?   rJ   rt   r   rK   r    r   rL   start_positionsend_positionsr   r  r  r   c                 *   ||n| j                   j                  }| j                  ||||||||
||
      }|d   }| j                  |      }|j	                  dd      \  }}|j                  d      j                         }|j                  d      j                         }d}||	t        |j                               dkD  r|j                  d      }t        |	j                               dkD  r|	j                  d      }	|j                  d      }|j                  d|      }|	j                  d|      }	t        |      } |||      } |||	      }||z   dz  }|s||f|dd z   }||f|z   S |S t        ||||j                  |j                  	      S )
a  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, AutoModelForQuestionAnswering
        >>> from datasets import load_dataset

        >>> tokenizer = AutoTokenizer.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")
        >>> model = AutoModelForQuestionAnswering.from_pretrained("SCUT-DLVCLab/lilt-roberta-en-base")

        >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train")
        >>> example = dataset[0]
        >>> words = example["tokens"]
        >>> boxes = example["bboxes"]

        >>> encoding = tokenizer(words, boxes=boxes, return_tensors="pt")

        >>> outputs = model(**encoding)

        >>> answer_start_index = outputs.start_logits.argmax()
        >>> answer_end_index = outputs.end_logits.argmax()

        >>> predict_answer_tokens = encoding.input_ids[0, answer_start_index : answer_end_index + 1]
        >>> predicted_answer = tokenizer.decode(predict_answer_tokens)
        ```Nre  r   r   r!   rQ   )ignore_indexrq   )rj  start_logits
end_logitsr   r  )r<   rN  r)  r  splitrm  r   r   rG   clampr   r   r   r  )r;   rJ   rt   r   rK   r    r   rL   r  r  r   r  r  r   rZ  rk  r  r  
total_lossignored_indexro  
start_lossend_lossr   s                           r>   rO   z LiltForQuestionAnswering.forward  s   ^ &1%<k$++B]B]))))%'/!5#  
 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r?   )NNNNNNNNNNNN)r\   r]   r^   r&   r   r   r6   rq  r   r   r   r   r   r   rO   r_   r`   s   @r>   r  r    sT     15+/6:593715596:48,0/3&*_
E,,-_
 u''(_
 !!2!23	_

 !!1!12_
 u//0_
 E--._
   1 12_
 "%"2"23_
   0 01_
 $D>_
 'tn_
 d^_
 
uU\\"$@@	A_
 _
r?   r  )r  r]  rs  r;  r(  )6r  r   typingr   r   r6   torch.utils.checkpointr   torch.nnr   r   r   activationsr
   modeling_layersr   modeling_outputsr   r   r   r   r   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   configuration_liltr   
get_loggerr\   loggerModuler   rb   r}   r   r   r   r   r   r  r  r(  r;  r]  rs  ra  r  __all__r  r?   r>   <module>r     s     "    A A ! 9  . l l , * 
		H	%V= V=r5+299 5+pG		 GVRYY 1BII 1jryy   9* 9x:
")) :
|  */ * *. Q
# Q
 Q
h n
$7 n
n
b _
!4 _
 _
FRYY , l
2 l
 l
^r?   