
    rh                     ~   d Z ddlmZmZmZ ddlZddlZddlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZ dd	lmZmZmZmZmZmZ dd
lmZmZ ddlmZmZmZ ddlmZm Z m!Z! ddl"m#Z#  e!jH                  e%      Z&ejN                  Z( G d dejR                        Z*	 	 d@dejR                  dejV                  dejV                  dejV                  deejV                     de,de,deejV                     fdZ- G d dejR                        Z. G d dejR                        Z/ G d dejR                        Z0 G d  d!ejR                        Z1 G d" d#ejR                        Z2 G d$ d%e      Z3 G d& d'ejR                        Z4 G d( d)ejR                        Z5 G d* d+ejR                        Z6 G d, d-ejR                        Z7 G d. d/ejR                        Z8e G d0 d1e             Z9e G d2 d3e9             Z:e G d4 d5e9             Z; ed67       G d8 d9e9             Z< ed:7       G d; d<e9             Z=e G d= d>e9             Z>g d?Z?y)AzPyTorch LayoutLM model.    )CallableOptionalUnionN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringcan_return_tuplelogging   )LayoutLMConfigc                   4     e Zd ZdZ fdZ	 	 	 	 	 ddZ xZS )LayoutLMEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                    t         |           t        j                  |j                  |j
                  |j                        | _        t        j                  |j                  |j
                        | _	        t        j                  |j                  |j
                        | _        t        j                  |j                  |j
                        | _        t        j                  |j                  |j
                        | _        t        j                  |j                  |j
                        | _        t        j                  |j                  |j
                        | _        t#        |j
                  |j$                        | _        t        j(                  |j*                        | _        | j/                  dt1        j2                  |j                        j5                  d      d       y )N)padding_idxepsposition_ids)r   F)
persistent)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingsmax_2d_position_embeddingsx_position_embeddingsy_position_embeddingsh_position_embeddingsw_position_embeddingstype_vocab_sizetoken_type_embeddingsLayoutLMLayerNormlayer_norm_eps	LayerNormDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandselfconfig	__class__s     /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/layoutlm/modeling_layoutlm.pyr'   zLayoutLMEmbeddings.__init__1   s[   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2S2SU[UgUg%h"%'\\&2S2SU[UgUg%h"%'\\&2S2SU[UgUg%h"%'\\&2S2SU[UgUg%h"%'\\&2H2H&J\J\%]"*6+=+=6CXCXYzz&"<"<=ELL)G)GHOOPWXej 	 	
    c                    ||j                         }n|j                         d d }|d   }||j                  n|j                  }|| j                  d d d |f   }|&t        j                  |t        j
                  |      }|| j                  |      }|}	| j                  |      }
	 | j                  |d d d d df         }| j                  |d d d d df         }| j                  |d d d d df         }| j                  |d d d d df         }| j                  |d d d d df   |d d d d df   z
        }| j                  |d d d d df   |d d d d df   z
        }| j                  |      }|	|
z   |z   |z   |z   |z   |z   |z   |z   }| j                  |      }| j                  |      }|S # t        $ r}t        d      |d }~ww xY w)Nr$   r   dtypedevicer      r
   z:The `bbox`coordinate values should be within 0-1000 range.)sizerI   r#   r=   zeroslongr,   r.   r0   r1   
IndexErrorr2   r3   r5   r8   r;   )rA   	input_idsbboxtoken_type_idsr#   inputs_embedsinput_shape
seq_lengthrI   words_embeddingsr.   left_position_embeddingsupper_position_embeddingsright_position_embeddingslower_position_embeddingser2   r3   r5   
embeddingss                       rD   forwardzLayoutLMEmbeddings.forwardB   s1     #..*K',,.s3K ^
%.%:!!@T@T,,Q^<L!"[[EJJvVN  00;M("66|D	b'+'A'A$q!Qw-'P$(,(B(B41a=(Q%(,(B(B41a=(Q%(,(B(B41a=(Q% !% : :41a=4PQSTVWPW=;X Y $ : :41a=4PQSTVWPW=;X Y $ : :> J !"&' (( (	(
 (( $$ $$ $$ 	 ^^J/
\\*-
)  	bYZ`aa	bs   ,A,F7 7	G GG)NNNNN)__name__
__module____qualname____doc__r'   r\   __classcell__rC   s   @rD   r   r   .   s!    Q
& 5rE   r   modulequerykeyvalueattention_maskscalingr;   	head_maskc                 .   t        j                  ||j                  dd            |z  }	|#|d d d d d d d |j                  d   f   }
|	|
z   }	t        j
                  j                  |	dt         j                        j                  |j                        }	t        j
                  j                  |	|| j                        }	||	|j                  dddd      z  }	t        j                  |	|      }|j                  dd      j                         }||	fS )NrJ   r
   r$   )dimrH   )ptrainingr   )r=   matmul	transposeshaper   
functionalsoftmaxfloat32torH   r;   rn   view
contiguous)rc   rd   re   rf   rg   rh   r;   ri   kwargsattn_weightscausal_maskattn_outputs               rD   eager_attention_forwardr|   {   s     <<s}}Q':;gEL!$Q1o		"o%=>#k1==((2U]](SVVW\WbWbcL==((6??([L#innQAq&AA,,|U3K''1-88:K$$rE   c                        e Zd Z fdZ	 	 	 ddej
                  deej                     deej                     dee   de	ej
                     f
dZ
 xZS )	LayoutLMSelfAttentionc                 $   t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      || _        |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _	        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                         | _        |j                   | _        | j                  dz  | _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()g      )r&   r'   r*   num_attention_headshasattr
ValueErrorrB   intattention_head_sizeall_head_sizer   Linearrd   re   rf   r9   attention_probs_dropout_probr;   attention_dropoutrh   r@   s     rD   r'   zLayoutLMSelfAttention.__init__   sC    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF!'!D!D//5rE   hidden_statesrg   ri   output_attentionsreturnc                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
t        }| j                  j                  dk7  rt        | j                  j                     } || ||	|
|f| j                  sdn| j                  | j                  |d|\  }} |j                  g |d j                         }|r||f}|S |f}|S )Nr$   r   rJ   eager        )r;   rh   ri   )rq   r   rd   rv   rp   re   rf   r|   rB   _attn_implementationr   rn   r   rh   reshaperw   )rA   r   rg   ri   r   rx   rS   hidden_shapequery_states
key_statesvalue_statesattention_interfacer{   ry   outputss                  rD   r\   zLayoutLMSelfAttention.forward   sa    $))#2.CCbC$*B*BCzz-055lCMMaQRSXXm,11,?II!QO
zz-055lCMMaQRS(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL
%
 
%
!\ *k));;;;FFH1B;- JUrE   NNF)r]   r^   r_   r'   r=   Tensorr   FloatTensorbooltupler\   ra   rb   s   @rD   r~   r~      so    60 7;15,1!||! !!2!23! E--.	!
 $D>! 
u||	!rE   r~   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )LayoutLMSelfOutputc                 (   t         |           t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                        | _
        y Nr!   )r&   r'   r   r   r*   denser8   r7   r9   r:   r;   r@   s     rD   r'   zLayoutLMSelfOutput.__init__   s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=rE   r   input_tensorr   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S Nr   r;   r8   rA   r   r   s      rD   r\   zLayoutLMSelfOutput.forward   7    

=1]3}|'CDrE   r]   r^   r_   r'   r=   r   r\   ra   rb   s   @rD   r   r      1    >U\\  RWR^R^ rE   r   c                        e Zd Z fdZd Z	 	 	 d	dej                  deej                     deej                     dee	   de
ej                     f
dZ xZS )
LayoutLMAttentionc                     t         |           t        |      | _        t	        |      | _        t               | _        y r   )r&   r'   r~   rA   r   outputsetpruned_headsr@   s     rD   r'   zLayoutLMAttention.__init__   s0    )&1	(0ErE   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   rl   )lenr   rA   r   r   r   r   rd   re   rf   r   r   r   union)rA   headsindexs      rD   prune_headszLayoutLMAttention.prune_heads   s   u:?749900$))2O2OQUQbQb
u
 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:rE   r   rg   ri   r   r   c                 p     | j                   |f|||d|}| j                  |d   |      }|f|dd  z   }|S N)rg   ri   r   r   r   )rA   r   )	rA   r   rg   ri   r   rx   self_outputsattention_outputr   s	            rD   r\   zLayoutLMAttention.forward   s_     !tyy
)/	

 
  ;;|AF#%QR(88rE   r   )r]   r^   r_   r'   r   r=   r   r   r   r   r   r\   ra   rb   s   @rD   r   r      st    ";* 7;15,1|| !!2!23 E--.	
 $D> 
u||	rE   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )LayoutLMIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y r   )r&   r'   r   r   r*   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnr@   s     rD   r'   zLayoutLMIntermediate.__init__  s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$rE   r   r   c                 J    | j                  |      }| j                  |      }|S r   )r   r   rA   r   s     rD   r\   zLayoutLMIntermediate.forward  s&    

=100?rE   r   rb   s   @rD   r   r     s#    9U\\ ell rE   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )LayoutLMOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )r&   r'   r   r   r   r*   r   r8   r7   r9   r:   r;   r@   s     rD   r'   zLayoutLMOutput.__init__   s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=rE   r   r   r   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S r   r   r   s      rD   r\   zLayoutLMOutput.forward&  r   rE   r   rb   s   @rD   r   r     r   rE   r   c                        e Zd Z fdZ	 	 	 d	dej
                  deej                     deej                     dee   de	ej
                     f
dZ
d Z xZS )
LayoutLMLayerc                     t         |           |j                  | _        d| _        t	        |      | _        t        |      | _        t        |      | _	        y )Nr   )
r&   r'   chunk_size_feed_forwardseq_len_dimr   	attentionr   intermediater   r   r@   s     rD   r'   zLayoutLMLayer.__init__/  sI    '-'E'E$*6208$V,rE   r   rg   ri   r   r   c                      | j                   |f|||d|}|d   }|dd  }t        | j                  | j                  | j                  |      }	|	f|z   }|S r   )r   r   feed_forward_chunkr   r   )
rA   r   rg   ri   r   rx   self_attention_outputsr   r   layer_outputs
             rD   r\   zLayoutLMLayer.forward7  s     "0"
)/	"

 "
 2!4(,0##T%A%A4CSCSUe
  /G+rE   c                 L    | j                  |      }| j                  ||      }|S r   )r   r   )rA   r   intermediate_outputr   s       rD   r   z LayoutLMLayer.feed_forward_chunkP  s,    "//0@A{{#68HIrE   r   )r]   r^   r_   r'   r=   r   r   r   r   r   r\   r   ra   rb   s   @rD   r   r   .  st    - 7;15,1|| !!2!23 E--.	
 $D> 
u||	2rE   r   c                        e Zd Z fdZe	 	 	 	 	 d
dej                  deej                     deej                     dee	   dee	   dee	   de
eej                     ef   fd	       Z xZS )LayoutLMEncoderc                     t         |           || _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        d| _	        y c c}w )NF)
r&   r'   rB   r   
ModuleListrangenum_hidden_layersr   layergradient_checkpointing)rA   rB   irC   s      rD   r'   zLayoutLMEncoder.__init__X  sN    ]]5IaIaCb#caM&$9#cd
&+# $ds   A#r   rg   ri   r   output_hidden_statesreturn_dictr   c           	          |rdnd }|rdnd }	t        | j                        D ]4  \  }
}|r||fz   }|||
   nd } |d||||d|}|d   }|s,|	|d   fz   }	6 |r||fz   }t        |||	      S )N )r   rg   ri   r   r   r   )last_hidden_stater   
attentions)	enumerater   r   )rA   r   rg   ri   r   r   r   rx   all_hidden_statesall_self_attentionsr   layer_modulelayer_head_masklayer_outputss                 rD   r\   zLayoutLMEncoder.forward^  s     #7BD$5b4(4 	POA|#$58H$H!.7.CilO( +-)"3	
 M *!,M &9]1=M<O&O#!	P$   1]4D D++*
 	
rE   )NNFFT)r]   r^   r_   r'   r   r=   r   r   r   r   r   r   r   r\   ra   rb   s   @rD   r   r   W  s    ,  7;15,1/4&*&
||&
 !!2!23&
 E--.	&

 $D>&
 'tn&
 d^&
 
uU\\"O3	4&
 &
rE   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )LayoutLMPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y r   )r&   r'   r   r   r*   r   Tanh
activationr@   s     rD   r'   zLayoutLMPooler.__init__  s9    YYv1163E3EF
'')rE   r   r   c                 \    |d d df   }| j                  |      }| j                  |      }|S )Nr   )r   r   )rA   r   first_token_tensorpooled_outputs       rD   r\   zLayoutLMPooler.forward  s6     +1a40

#566rE   r   rb   s   @rD   r   r     s#    $
U\\ ell rE   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )LayoutLMPredictionHeadTransformc                 h   t         |           t        j                  |j                  |j                        | _        t        |j                  t              rt        |j                     | _
        n|j                  | _
        t        j                  |j                  |j                        | _        y r   )r&   r'   r   r   r*   r   r   r   r   r   transform_act_fnr8   r7   r@   s     rD   r'   z(LayoutLMPredictionHeadTransform.__init__  s{    YYv1163E3EF
f''-$*6+<+<$=D!$*$5$5D!f&8&8f>S>STrE   r   r   c                 l    | j                  |      }| j                  |      }| j                  |      }|S r   )r   r   r8   r   s     rD   r\   z'LayoutLMPredictionHeadTransform.forward  s4    

=1--m<}5rE   r   rb   s   @rD   r   r     s$    UU\\ ell rE   r   c                   *     e Zd Z fdZd Zd Z xZS )LayoutLMLMPredictionHeadc                 H   t         |           t        |      | _        t	        j
                  |j                  |j                  d      | _        t	        j                  t        j                  |j                              | _        | j                  | j                  _        y )NF)bias)r&   r'   r   	transformr   r   r*   r)   decoder	Parameterr=   rL   r   r@   s     rD   r'   z!LayoutLMLMPredictionHead.__init__  sm    8@ yy!3!3V5F5FUSLLV->->!?@	 !IIrE   c                 :    | j                   | j                  _         y r   )r   r   rA   s    rD   _tie_weightsz%LayoutLMLMPredictionHead._tie_weights  s     IIrE   c                 J    | j                  |      }| j                  |      }|S r   )r   r   r   s     rD   r\   z LayoutLMLMPredictionHead.forward  s$    }5]3rE   )r]   r^   r_   r'   r  r\   ra   rb   s   @rD   r   r     s    &&rE   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )LayoutLMOnlyMLMHeadc                 B    t         |           t        |      | _        y r   )r&   r'   r   predictionsr@   s     rD   r'   zLayoutLMOnlyMLMHead.__init__  s    3F;rE   sequence_outputr   c                 (    | j                  |      }|S r   )r  )rA   r  prediction_scoress      rD   r\   zLayoutLMOnlyMLMHead.forward  s     ,,_=  rE   r   rb   s   @rD   r  r    s#    <!u|| ! !rE   r  c                   &    e Zd ZU eed<   dZdZd Zy)LayoutLMPreTrainedModelrB   layoutlmTc                 X   t        |t        j                        rm|j                  j                  j                  d| j                  j                         |j                  %|j                  j                  j                          yyt        |t        j                        rz|j                  j                  j                  d| j                  j                         |j                  2|j                  j                  |j                     j                          yyt        |t              rJ|j                  j                  j                          |j                  j                  j                  d       yt        |t              r%|j                  j                  j                          yy)zInitialize the weightsr   )meanstdN      ?)r   r   r   weightdatanormal_rB   initializer_ranger   zero_r(   r    r6   fill_r   )rA   rc   s     rD   _init_weightsz%LayoutLMPreTrainedModel._init_weights  s$   fbii( MM&&CT[[5R5R&S{{&  &&( '-MM&&CT[[5R5R&S!!-""6#5#56<<> . 12KK""$MM$$S) 89KK""$ :rE   N)r]   r^   r_   r   __annotations__base_model_prefixsupports_gradient_checkpointingr  r   rE   rD   r  r    s    "&*#%rE   r  c                   `    e Zd Z fdZd Zd Zd Zee	 	 	 	 	 	 	 	 	 	 dde	e
j                     de	e
j                     de	e
j                     de	e
j                     d	e	e
j                     d
e	e
j                     de	e
j                     de	e   de	e   de	e   deeef   fd              Z xZS )LayoutLMModelc                     t         |   |       || _        t        |      | _        t        |      | _        t        |      | _        | j                          y r   )
r&   r'   rB   r   r[   r   encoderr   pooler	post_initr@   s     rD   r'   zLayoutLMModel.__init__  sG     ,V4&v.$V, 	rE   c                 .    | j                   j                  S r   r[   r,   r  s    rD   get_input_embeddingsz"LayoutLMModel.get_input_embeddings  s    ...rE   c                 &    || j                   _        y r   r#  )rA   rf   s     rD   set_input_embeddingsz"LayoutLMModel.set_input_embeddings  s    */'rE   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  r   r   r   )rA   heads_to_pruner   r   s       rD   _prune_headszLayoutLMModel._prune_heads  sE    
 +002 	CLE5LLu%//;;EB	CrE   rO   rP   rg   rQ   r#   ri   rR   r   r   r   r   c                    ||n| j                   j                  }|	|	n| j                   j                  }	|
|
n| j                   j                  }
||t	        d      |#| j                  ||       |j                         }n!||j                         dd }nt	        d      ||j                  n|j                  }|t        j                  ||      }|&t        j                  |t        j                  |      }|)t        j                  |dz   t        j                  |      }|j                  d      j                  d	      }|j                  | j                  
      }d|z
  t        j                  | j                        j                   z  }||j#                         dk(  rh|j                  d      j                  d      j                  d      j                  d      }|j%                  | j                   j&                  dddd      }nB|j#                         d	k(  r/|j                  d      j                  d      j                  d      }|j                  t)        | j+                               j                  
      }ndg| j                   j&                  z  }| j-                  |||||      }| j/                  |||||	d      }|d   }| j1                  |      }t3        |||j4                  |j6                        S )a  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, LayoutLMModel
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
        >>> model = LayoutLMModel.from_pretrained("microsoft/layoutlm-base-uncased")

        >>> words = ["Hello", "world"]
        >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

        >>> token_boxes = []
        >>> for word, box in zip(words, normalized_word_boxes):
        ...     word_tokens = tokenizer.tokenize(word)
        ...     token_boxes.extend([box] * len(word_tokens))
        >>> # add bounding boxes of cls + sep tokens
        >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

        >>> encoding = tokenizer(" ".join(words), return_tensors="pt")
        >>> input_ids = encoding["input_ids"]
        >>> attention_mask = encoding["attention_mask"]
        >>> token_type_ids = encoding["token_type_ids"]
        >>> bbox = torch.tensor([token_boxes])

        >>> outputs = model(
        ...     input_ids=input_ids, bbox=bbox, attention_mask=attention_mask, token_type_ids=token_type_ids
        ... )

        >>> last_hidden_states = outputs.last_hidden_state
        ```NzDYou cannot specify both input_ids and inputs_embeds at the same timer$   z5You have to specify either input_ids or inputs_embeds)rI   rG   )   r   rJ   )rH   r  r   )rO   rP   r#   rQ   rR   T)ri   r   r   r   )r   pooler_outputr   r   )rB   r   r   use_return_dictr   %warn_if_padding_and_no_attention_maskrK   rI   r=   onesrL   rM   	unsqueezeru   rH   finfominrl   r?   r   next
parametersr[   r  r   r   r   r   )rA   rO   rP   rg   rQ   r#   ri   rR   r   r   r   rS   rI   extended_attention_maskembedding_outputencoder_outputsr  r   s                     rD   r\   zLayoutLMModel.forward  s   j 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66y.Q#..*K&',,.s3KTUU%.%:!!@T@T!"ZZFCN!"[[EJJvVN<;;{T1FSD"0":":1"="G"G"J"9"<"<4::"<"N#&)@#@EKKPTPZPZD[D_D_"_ }}!#%//2<<Q?II"MWWXZ[	%,,T[[-J-JBPRTVXZ[	A%%//2<<R@JJ2N	!40A+B+H+HII!>!>>I??%)' + 
 ,,#/!5 ' 
 *!,O4)-')77&11	
 	
rE   )
NNNNNNNNNN)r]   r^   r_   r'   r$  r&  r*  r   r   r   r=   
LongTensorr   r   r   r   r   r\   ra   rb   s   @rD   r  r    s1   	/0C  15+/6:59371559,0/3&*s
E,,-s
 u''(s
 !!2!23	s

 !!1!12s
 u//0s
 E--.s
   1 12s
 $D>s
 'tns
 d^s
 
u00	1s
  s
rE   r  c                       e Zd ZddgZ fdZd Zd Zd Zee		 	 	 	 	 	 	 	 	 	 	 dde
ej                     de
ej                     d	e
ej                     d
e
ej                     de
ej                     de
ej                     de
ej                     de
ej                     de
e   de
e   de
e   deeef   fd              Z xZS )LayoutLMForMaskedLMzcls.predictions.decoder.biaszcls.predictions.decoder.weightc                     t         |   |       t        |      | _        t	        |      | _        | j                          y r   )r&   r'   r  r  r  clsr!  r@   s     rD   r'   zLayoutLMForMaskedLM.__init__}  s4     %f-&v. 	rE   c                 B    | j                   j                  j                  S r   r  r[   r,   r  s    rD   r$  z(LayoutLMForMaskedLM.get_input_embeddings      }}''777rE   c                 B    | j                   j                  j                  S r   )r=  r  r   r  s    rD   get_output_embeddingsz)LayoutLMForMaskedLM.get_output_embeddings  s    xx##+++rE   c                     || j                   j                  _        |j                  | j                   j                  _        y r   )r=  r  r   r   )rA   new_embeddingss     rD   set_output_embeddingsz)LayoutLMForMaskedLM.set_output_embeddings  s,    '5$$2$7$7!rE   rO   rP   rg   rQ   r#   ri   rR   labelsr   r   r   r   c                 r   ||n| j                   j                  }| j                  ||||||||	|
d
      }|d   }| j                  |      }d}|Ft	               } ||j                  d| j                   j                        |j                  d            }t        |||j                  |j                        S )a2	  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, LayoutLMForMaskedLM
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
        >>> model = LayoutLMForMaskedLM.from_pretrained("microsoft/layoutlm-base-uncased")

        >>> words = ["Hello", "[MASK]"]
        >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

        >>> token_boxes = []
        >>> for word, box in zip(words, normalized_word_boxes):
        ...     word_tokens = tokenizer.tokenize(word)
        ...     token_boxes.extend([box] * len(word_tokens))
        >>> # add bounding boxes of cls + sep tokens
        >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

        >>> encoding = tokenizer(" ".join(words), return_tensors="pt")
        >>> input_ids = encoding["input_ids"]
        >>> attention_mask = encoding["attention_mask"]
        >>> token_type_ids = encoding["token_type_ids"]
        >>> bbox = torch.tensor([token_boxes])

        >>> labels = tokenizer("Hello world", return_tensors="pt")["input_ids"]

        >>> outputs = model(
        ...     input_ids=input_ids,
        ...     bbox=bbox,
        ...     attention_mask=attention_mask,
        ...     token_type_ids=token_type_ids,
        ...     labels=labels,
        ... )

        >>> loss = outputs.loss
        ```NT)rg   rQ   r#   ri   rR   r   r   r   r   r$   losslogitsr   r   )
rB   r.  r  r=  r   rv   r)   r   r   r   )rA   rO   rP   rg   rQ   r#   ri   rR   rF  r   r   r   r   r  r
  masked_lm_lossloss_fcts                    rD   r\   zLayoutLMForMaskedLM.forward  s    @ &1%<k$++B]B]--))%'/!5   
 "!* HH_5')H%!&&r4;;+A+ABBN
 $!//))	
 	
rE   NNNNNNNNNNN)r]   r^   r_   _tied_weights_keysr'   r$  rB  rE  r   r   r   r=   r9  r   r   r   r   r   r\   ra   rb   s   @rD   r;  r;  y  sT   8:Z[8,8  15+/6:59371559-1,0/3&*]
E,,-]
 u''(]
 !!2!23	]

 !!1!12]
 u//0]
 E--.]
   1 12]
 ))*]
 $D>]
 'tn]
 d^]
 
un$	%]
  ]
rE   r;  z
    LayoutLM Model with a sequence classification head on top (a linear layer on top of the pooled output) e.g. for
    document image classification tasks such as the [RVL-CDIP](https://www.cs.cmu.edu/~aharley/rvl-cdip/) dataset.
    )custom_introc                   t    e Zd Z fdZd Zee	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
eej                     dee   dee   dee   deeef   fd              Z xZS )!LayoutLMForSequenceClassificationc                 ,   t         |   |       |j                  | _        t        |      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        | j                          y r   r&   r'   
num_labelsr  r  r   r9   r:   r;   r   r*   
classifierr!  r@   s     rD   r'   z*LayoutLMForSequenceClassification.__init__  i      ++%f-zz&"<"<=))F$6$68I8IJ 	rE   c                 B    | j                   j                  j                  S r   r?  r  s    rD   r$  z6LayoutLMForSequenceClassification.get_input_embeddings  r@  rE   rO   rP   rg   rQ   r#   ri   rR   rF  r   r   r   r   c                    ||n| j                   j                  }| j                  ||||||||	|
d
      }|d   }| j                  |      }| j	                  |      }d}|| j                   j
                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rIt               }| j                  dk(  r& ||j                         |j                               }n |||      }n| j                   j
                  dk(  r=t               } ||j                  d| j                        |j                  d            }n,| j                   j
                  dk(  rt               } |||      }t!        |||j"                  |j$                  	      S )
aB	  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, LayoutLMForSequenceClassification
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
        >>> model = LayoutLMForSequenceClassification.from_pretrained("microsoft/layoutlm-base-uncased")

        >>> words = ["Hello", "world"]
        >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

        >>> token_boxes = []
        >>> for word, box in zip(words, normalized_word_boxes):
        ...     word_tokens = tokenizer.tokenize(word)
        ...     token_boxes.extend([box] * len(word_tokens))
        >>> # add bounding boxes of cls + sep tokens
        >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

        >>> encoding = tokenizer(" ".join(words), return_tensors="pt")
        >>> input_ids = encoding["input_ids"]
        >>> attention_mask = encoding["attention_mask"]
        >>> token_type_ids = encoding["token_type_ids"]
        >>> bbox = torch.tensor([token_boxes])
        >>> sequence_label = torch.tensor([1])

        >>> outputs = model(
        ...     input_ids=input_ids,
        ...     bbox=bbox,
        ...     attention_mask=attention_mask,
        ...     token_type_ids=token_type_ids,
        ...     labels=sequence_label,
        ... )

        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```NT
rO   rP   rg   rQ   r#   ri   rR   r   r   r   r   
regressionsingle_label_classificationmulti_label_classificationr$   rH  )rB   r.  r  r;   rU  problem_typerT  rH   r=   rM   r   r	   squeezer   rv   r   r   r   r   )rA   rO   rP   rg   rQ   r#   ri   rR   rF  r   r   r   r   r   rJ  rI  rL  s                    rD   r\   z)LayoutLMForSequenceClassification.forward  s   @ &1%<k$++B]B]--))%'/!5   
  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./'!//))	
 	
rE   rM  )r]   r^   r_   r'   r$  r   r   r   r=   r9  r   r   r   r   r   r\   ra   rb   s   @rD   rQ  rQ    s?   8  15+/6:59371559-1,0/3&*n
E,,-n
 u''(n
 !!2!23	n

 !!1!12n
 u//0n
 E--.n
   1 12n
 ))*n
 $D>n
 'tnn
 d^n
 
u..	/n
  n
rE   rQ  a3  
    LayoutLM Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
    sequence labeling (information extraction) tasks such as the [FUNSD](https://guillaumejaume.github.io/FUNSD/)
    dataset and the [SROIE](https://rrc.cvc.uab.es/?ch=13) dataset.
    c                   t    e Zd Z fdZd Zee	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
eej                     dee   dee   dee   deeef   fd              Z xZS )LayoutLMForTokenClassificationc                 ,   t         |   |       |j                  | _        t        |      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        | j                          y r   rS  r@   s     rD   r'   z'LayoutLMForTokenClassification.__init__  rV  rE   c                 B    | j                   j                  j                  S r   r?  r  s    rD   r$  z3LayoutLMForTokenClassification.get_input_embeddings  r@  rE   rO   rP   rg   rQ   r#   ri   rR   rF  r   r   r   r   c                    ||n| j                   j                  }| j                  ||||||||	|
d
      }|d   }| j                  |      }| j	                  |      }d}|<t               } ||j                  d| j                        |j                  d            }t        |||j                  |j                        S )a  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

        Examples:

        ```python
        >>> from transformers import AutoTokenizer, LayoutLMForTokenClassification
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
        >>> model = LayoutLMForTokenClassification.from_pretrained("microsoft/layoutlm-base-uncased")

        >>> words = ["Hello", "world"]
        >>> normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]

        >>> token_boxes = []
        >>> for word, box in zip(words, normalized_word_boxes):
        ...     word_tokens = tokenizer.tokenize(word)
        ...     token_boxes.extend([box] * len(word_tokens))
        >>> # add bounding boxes of cls + sep tokens
        >>> token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]

        >>> encoding = tokenizer(" ".join(words), return_tensors="pt")
        >>> input_ids = encoding["input_ids"]
        >>> attention_mask = encoding["attention_mask"]
        >>> token_type_ids = encoding["token_type_ids"]
        >>> bbox = torch.tensor([token_boxes])
        >>> token_labels = torch.tensor([1, 1, 0, 0]).unsqueeze(0)  # batch size of 1

        >>> outputs = model(
        ...     input_ids=input_ids,
        ...     bbox=bbox,
        ...     attention_mask=attention_mask,
        ...     token_type_ids=token_type_ids,
        ...     labels=token_labels,
        ... )

        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```NTrY  r   r$   rH  )rB   r.  r  r;   rU  r   rv   rT  r   r   r   )rA   rO   rP   rg   rQ   r#   ri   rR   rF  r   r   r   r   r  rJ  rI  rL  s                    rD   r\   z&LayoutLMForTokenClassification.forward  s    | &1%<k$++B]B]--))%'/!5   
 "!*,,71')HFKKDOO<fkk"oND$!//))	
 	
rE   rM  )r]   r^   r_   r'   r$  r   r   r   r=   r9  r   r   r   r   r   r\   ra   rb   s   @rD   r`  r`  y  s?   8  15+/6:59371559-1,0/3&*Z
E,,-Z
 u''(Z
 !!2!23	Z

 !!1!12Z
 u//0Z
 E--.Z
   1 12Z
 ))*Z
 $D>Z
 'tnZ
 d^Z
 
u++	,Z
  Z
rE   r`  c                       e Zd Zd fd	Zd Zee	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
eej                     deej                     dee   dee   dee   deeef   fd              Z xZS )LayoutLMForQuestionAnsweringc                     t         |   |       |j                  | _        t        |      | _        t        j                  |j                  |j                        | _        | j                          y)z
        has_visual_segment_embedding (`bool`, *optional*, defaults to `True`):
            Whether or not to add visual segment embeddings.
        N)
r&   r'   rT  r  r  r   r   r*   
qa_outputsr!  )rA   rB   has_visual_segment_embeddingrC   s      rD   r'   z%LayoutLMForQuestionAnswering.__init__  sU    
 	  ++%f-))F$6$68I8IJ 	rE   c                 B    | j                   j                  j                  S r   r?  r  s    rD   r$  z1LayoutLMForQuestionAnswering.get_input_embeddings  r@  rE   rO   rP   rg   rQ   r#   ri   rR   start_positionsend_positionsr   r   r   r   c                    ||n| j                   j                  }| j                  ||||||||
|d
      }|d   }| j                  |      }|j	                  dd      \  }}|j                  d      j                         }|j                  d      j                         }d}||	t        |j                               dkD  r|j                  d      }t        |	j                               dkD  r|	j                  d      }	|j                  d      }|j                  d|      }|	j                  d|      }	t        |      } |||      } |||	      }||z   d	z  }t        ||||j                  |j                  
      S )a4	  
        bbox (`torch.LongTensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner. See [Overview](#Overview) for normalization.

        Example:

        In the example below, we prepare a question + context pair for the LayoutLM model. It will give us a prediction
        of what it thinks the answer is (the span of the answer within the texts parsed from the image).

        ```python
        >>> from transformers import AutoTokenizer, LayoutLMForQuestionAnswering
        >>> from datasets import load_dataset
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("impira/layoutlm-document-qa", add_prefix_space=True)
        >>> model = LayoutLMForQuestionAnswering.from_pretrained("impira/layoutlm-document-qa", revision="1e3ebac")

        >>> dataset = load_dataset("nielsr/funsd", split="train")
        >>> example = dataset[0]
        >>> question = "what's his name?"
        >>> words = example["words"]
        >>> boxes = example["bboxes"]

        >>> encoding = tokenizer(
        ...     question.split(), words, is_split_into_words=True, return_token_type_ids=True, return_tensors="pt"
        ... )
        >>> bbox = []
        >>> for i, s, w in zip(encoding.input_ids[0], encoding.sequence_ids(0), encoding.word_ids(0)):
        ...     if s == 1:
        ...         bbox.append(boxes[w])
        ...     elif i == tokenizer.sep_token_id:
        ...         bbox.append([1000] * 4)
        ...     else:
        ...         bbox.append([0] * 4)
        >>> encoding["bbox"] = torch.tensor([bbox])

        >>> word_ids = encoding.word_ids(0)
        >>> outputs = model(**encoding)
        >>> loss = outputs.loss
        >>> start_scores = outputs.start_logits
        >>> end_scores = outputs.end_logits
        >>> start, end = word_ids[start_scores.argmax(-1)], word_ids[end_scores.argmax(-1)]
        >>> print(" ".join(words[start : end + 1]))
        M. Hamann P. Harper, P. Martinez
        ```NTrY  r   r   r$   r   )ignore_indexrJ   )rI  start_logits
end_logitsr   r   )rB   r.  r  rg  splitr^  rw   r   rK   clampr   r   r   r   )rA   rO   rP   rg   rQ   r#   ri   rR   rj  rk  r   r   r   r   r  rJ  rn  ro  
total_lossignored_indexrL  
start_lossend_losss                          rD   r\   z$LayoutLMForQuestionAnswering.forward   s   D &1%<k$++B]B]--))%'/!5   
 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J+%!!//))
 	
rE   )T)NNNNNNNNNNNN)r]   r^   r_   r'   r$  r   r   r   r=   r9  r   r   r   r   r   r\   ra   rb   s   @rD   re  re    sX   8  15+/6:593715596:48,0/3&*m
E,,-m
 u''(m
 !!2!23	m

 !!1!12m
 u//0m
 E--.m
   1 12m
 "%"2"23m
   0 01m
 $D>m
 'tnm
 d^m
 
u22	3m
  m
rE   re  )r;  rQ  r`  re  r  r  )r   N)@r`   typingr   r   r   r=   torch.utils.checkpointr   torch.nnr   r   r	   activationsr   modeling_layersr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   r   pytorch_utilsr   r   r   utilsr   r   r   configuration_layoutlmr   
get_loggerr]   loggerr8   r6   Moduler   r   floatr|   r~   r   r   r   r   r   r   r   r   r   r  r  r  r;  rQ  r`  re  __all__r   rE   rD   <module>r     sy    , ,    A A ! 9  G l l > > 2 
		H	% LL I Ih (,%II%<<% 
% <<	%
 U\\*% % % %%87BII 7v *		 *\299  RYY %. %R.
bii .
dRYY  bii $ryy 0!")) ! %o % %0 O
+ O
 O
d u
1 u
 u
p ~
(? ~
~
B j
%< j
j
Z A
#: A
 A
HrE   