
    rh                     x   d Z ddlZddlmZmZmZ ddlZddlZddlmZ ddl	m
Z
mZmZ ddlmZ ddlmZ dd	lmZmZmZmZmZmZ dd
lmZmZmZmZmZ ddlmZm Z m!Z! ddl"m#Z#  e!jH                  e%      Z& G d dejN                        Z(d@dZ) G d dejN                        Z* G d dejN                        Z+ G d dejN                        Z, G d dejN                        Z- G d dejN                        Z. G d dejN                        Z/ G d dejN                        Z0 G d d ejN                        Z1	 	 dAd!ejN                  d"ejd                  d#ejd                  d$ejd                  d%eejd                     d&e3d'e3d(eejd                     fd)Z4 G d* d+ejN                        Z5 G d, d-ejN                        Z6 G d. d/e      Z7 G d0 d1ejN                        Z8e G d2 d3e             Z9e G d4 d5e9             Z:e G d6 d7e9             Z; ed89       G d: d;e9             Z< ed<9       G d= d>e9             Z=g d?Z>y)BzPyTorch MarkupLM model.    N)CallableOptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModelapply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringcan_return_tuplelogging   )MarkupLMConfigc                   *     e Zd ZdZ fdZddZ xZS )XPathEmbeddingszConstruct the embeddings from xpath tags and subscripts.

    We drop tree-id in this version, as its info can be covered by xpath.
    c           	         t         |           |j                  | _        t        j                  |j
                  | j                  z  |j                        | _        t        j                  |j                        | _
        t        j                         | _        t        j                  |j
                  | j                  z  d|j                  z        | _        t        j                  d|j                  z  |j                        | _        t        j                  t!        | j                        D cg c],  }t        j"                  |j$                  |j
                        . c}      | _        t        j                  t!        | j                        D cg c],  }t        j"                  |j(                  |j
                        . c}      | _        y c c}w c c}w )N   )super__init__	max_depthr   Linearxpath_unit_hidden_sizehidden_sizexpath_unitseq2_embeddingsDropouthidden_dropout_probdropoutReLU
activationxpath_unitseq2_inner	inner2emb
ModuleListrange	Embeddingmax_xpath_tag_unit_embeddingsxpath_tag_sub_embeddingsmax_xpath_subs_unit_embeddingsxpath_subs_sub_embeddings)selfconfig_	__class__s      /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/markuplm/modeling_markuplm.pyr"   zXPathEmbeddings.__init__7   sZ   )))+63P3PSWSaSa3acicucu)v&zz&"<"<='')$&IIf.K.Kdnn.\^_bhbtbt^t$u!1v'9'9#96;M;MN(* t~~. VAA6C`C`a)
% *, t~~. VBBFDaDab*
&s   11G1Gc           	         g }g }t        | j                        D ]^  }|j                   | j                  |   |d d d d |f                |j                   | j                  |   |d d d d |f                ` t        j                  |d      }t        j                  |d      }||z   }| j                  | j                  | j                  | j                  |                        }|S )Ndim)r0   r#   appendr3   r5   torchcatr.   r*   r,   r-   )r6   xpath_tags_seqxpath_subs_seqxpath_tags_embeddingsxpath_subs_embeddingsixpath_embeddingss          r:   forwardzXPathEmbeddings.forwardQ   s     " "t~~& 	eA!(()I)F)Fq)I.YZ\]_`Y`Ja)bc!(()J)G)G)J>Z[]^`aZaKb)cd	e !&		*?R H %		*?R H03HH>>$,,ttG`G`aqGr7s*tu    )NN)__name__
__module____qualname____doc__r"   rH   __classcell__r9   s   @r:   r   r   1   s    

4 rI   r   c                     | j                  |      j                         }t        j                  |d      j	                  |      |z   |z  }|j                         |z   S )a  
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`.

    Args:
        x: torch.Tensor x:

    Returns: torch.Tensor
    r   r=   )neintr@   cumsumtype_aslong)	input_idspadding_idxpast_key_values_lengthmaskincremental_indicess        r:   "create_position_ids_from_input_idsr[   d   sW     <<$((*D <<!4<<TBE[[_cc##%33rI   c                   >     e Zd ZdZ fdZd Z	 	 	 	 	 	 	 ddZ xZS )MarkupLMEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                 d   t         |           || _        t        j                  |j
                  |j                  |j                        | _        t        j                  |j                  |j                        | _
        |j                  | _        t        |      | _        t        j                  |j                  |j                        | _        t        j                   |j                  |j"                        | _        t        j$                  |j&                        | _        | j+                  dt-        j.                  |j                        j1                  d      d       |j                  | _        t        j                  |j                  |j                  | j2                        | _
        y )N)rW   epsposition_ids)r   r<   F)
persistent)r!   r"   r7   r   r1   
vocab_sizer&   pad_token_idword_embeddingsmax_position_embeddingsposition_embeddingsr#   r   rG   type_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsr(   r)   r*   register_bufferr@   arangeexpandrW   r6   r7   r9   s     r:   r"   zMarkupLMEmbeddings.__init__w   s:   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c )) / 7%'\\&2H2H&J\J\%]"f&8&8f>S>STzz&"<"<=ELL)G)GHOOPWXej 	 	
 "..#%<<**F,>,>DL\L\$
 rI   c                    |j                         dd }|d   }t        j                  | j                  dz   || j                  z   dz   t        j                  |j
                        }|j                  d      j                  |      S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        Nr<   r   dtypedevicer   )sizer@   rm   rW   rU   rs   	unsqueezern   )r6   inputs_embedsinput_shapesequence_lengthra   s        r:   &create_position_ids_from_inputs_embedsz9MarkupLMEmbeddings.create_position_ids_from_inputs_embeds   s     $((*3B/%a.||q /D4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<rI   c                    ||j                         }n|j                         d d }||j                  n|j                  }	|+|t        || j                  |      }n| j	                  |      }|&t        j                  |t
        j                  |	      }|| j                  |      }|]| j                  j                  t        j                  t        t        |      | j                  gz         t
        j                  |	      z  }|]| j                  j                  t        j                  t        t        |      | j                  gz         t
        j                  |	      z  }|}
| j!                  |      }| j#                  |      }| j%                  ||      }|
|z   |z   |z   }| j'                  |      }| j)                  |      }|S )Nr<   rq   )rt   rs   r[   rW   ry   r@   zerosrU   re   r7   
tag_pad_idonestuplelistr#   subs_pad_idrg   ri   rG   rj   r*   )r6   rV   rB   rC   token_type_idsra   rv   rX   rw   rs   words_embeddingsrg   ri   rG   
embeddingss                  r:   rH   zMarkupLMEmbeddings.forward   s     #..*K',,.s3K%.%:!!@T@T$A)TM]M]_uv#JJ=Y!"[[EJJvVN  00;M !![[33ejjd;'4>>*::;5::V\7 N !![[44uzzd;'4>>*::;5::V\8 N )"66|D $ : :> J00P%(;;>SSVff
^^J/
\\*-
rI   )NNNNNNr   )rJ   rK   rL   rM   r"   ry   rH   rN   rO   s   @r:   r]   r]   t   s,    Q
2=&  2rI   r]   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )MarkupLMSelfOutputc                 (   t         |           t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                        | _
        y Nr_   )r!   r"   r   r$   r&   denserj   rk   r(   r)   r*   ro   s     r:   r"   zMarkupLMSelfOutput.__init__   s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=rI   hidden_statesinput_tensorreturnc                 r    | j                  |      }| j                  |      }| j                  ||z         }|S Nr   r*   rj   r6   r   r   s      r:   rH   zMarkupLMSelfOutput.forward   7    

=1]3}|'CDrI   rJ   rK   rL   r"   r@   TensorrH   rN   rO   s   @r:   r   r      1    >U\\  RWR^R^ rI   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MarkupLMIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y r   )r!   r"   r   r$   r&   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnro   s     r:   r"   zMarkupLMIntermediate.__init__   s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$rI   r   r   c                 J    | j                  |      }| j                  |      }|S r   )r   r   r6   r   s     r:   rH   zMarkupLMIntermediate.forward   s&    

=100?rI   r   rO   s   @r:   r   r      s#    9U\\ ell rI   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )MarkupLMOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )r!   r"   r   r$   r   r&   r   rj   rk   r(   r)   r*   ro   s     r:   r"   zMarkupLMOutput.__init__   s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=rI   r   r   r   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S r   r   r   s      r:   rH   zMarkupLMOutput.forward   r   rI   r   rO   s   @r:   r   r      r   rI   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MarkupLMPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y r   )r!   r"   r   r$   r&   r   Tanhr,   ro   s     r:   r"   zMarkupLMPooler.__init__  s9    YYv1163E3EF
'')rI   r   r   c                 \    |d d df   }| j                  |      }| j                  |      }|S )Nr   )r   r,   )r6   r   first_token_tensorpooled_outputs       r:   rH   zMarkupLMPooler.forward  s6     +1a40

#566rI   r   rO   s   @r:   r   r     s#    $
U\\ ell rI   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MarkupLMPredictionHeadTransformc                 h   t         |           t        j                  |j                  |j                        | _        t        |j                  t              rt        |j                     | _
        n|j                  | _
        t        j                  |j                  |j                        | _        y r   )r!   r"   r   r$   r&   r   r   r   r   r   transform_act_fnrj   rk   ro   s     r:   r"   z(MarkupLMPredictionHeadTransform.__init__  s{    YYv1163E3EF
f''-$*6+<+<$=D!$*$5$5D!f&8&8f>S>STrI   r   r   c                 l    | j                  |      }| j                  |      }| j                  |      }|S r   )r   r   rj   r   s     r:   rH   z'MarkupLMPredictionHeadTransform.forward  s4    

=1--m<}5rI   r   rO   s   @r:   r   r     s$    UU\\ ell rI   r   c                   *     e Zd Z fdZd Zd Z xZS )MarkupLMLMPredictionHeadc                 H   t         |           t        |      | _        t	        j
                  |j                  |j                  d      | _        t	        j                  t        j                  |j                              | _        | j                  | j                  _        y )NF)bias)r!   r"   r   	transformr   r$   r&   rc   decoder	Parameterr@   r{   r   ro   s     r:   r"   z!MarkupLMLMPredictionHead.__init__(  sm    8@ yy!3!3V5F5FUSLLV->->!?@	 !IIrI   c                 :    | j                   | j                  _         y r   )r   r   r6   s    r:   _tie_weightsz%MarkupLMLMPredictionHead._tie_weights5  s     IIrI   c                 J    | j                  |      }| j                  |      }|S r   )r   r   r   s     r:   rH   z MarkupLMLMPredictionHead.forward8  s$    }5]3rI   )rJ   rK   rL   r"   r   rH   rN   rO   s   @r:   r   r   '  s    &&rI   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MarkupLMOnlyMLMHeadc                 B    t         |           t        |      | _        y r   )r!   r"   r   predictionsro   s     r:   r"   zMarkupLMOnlyMLMHead.__init__@  s    3F;rI   sequence_outputr   c                 (    | j                  |      }|S r   )r   )r6   r   prediction_scoress      r:   rH   zMarkupLMOnlyMLMHead.forwardD  s     ,,_=  rI   r   rO   s   @r:   r   r   ?  s#    <!u|| ! !rI   r   modulequerykeyvalueattention_maskscalingr*   	head_maskc                 .   t        j                  ||j                  dd            |z  }	|#|d d d d d d d |j                  d   f   }
|	|
z   }	t        j
                  j                  |	dt         j                        j                  |j                        }	t        j
                  j                  |	|| j                        }	||	|j                  dddd      z  }	t        j                  |	|      }|j                  dd      j                         }||	fS )N   r
   r<   )r>   rr   )ptrainingr   )r@   matmul	transposeshaper   
functionalsoftmaxfloat32torr   r*   r   view
contiguous)r   r   r   r   r   r   r*   r   kwargsattn_weightscausal_maskattn_outputs               r:   eager_attention_forwardr   J  s     <<s}}Q':;gEL!$Q1o		"o%=>#k1==((2U]](SVVW\WbWbcL==((6??([L#innQAq&AA,,|U3K''1-88:K$$rI   c                        e Zd Z fdZ	 	 	 ddej
                  deej                     deej                     dee   de	ej
                     f
dZ
 xZS )	MarkupLMSelfAttentionc                 $   t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      || _        |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _	        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                         | _        |j                   | _        | j                  dz  | _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()g      )r!   r"   r&   num_attention_headshasattr
ValueErrorr7   rR   attention_head_sizeall_head_sizer   r$   r   r   r   r(   attention_probs_dropout_probr*   attention_dropoutr   ro   s     r:   r"   zMarkupLMSelfAttention.__init__g  sC    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF!'!D!D//5rI   r   r   r   output_attentionsr   c                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
t        }| j                  j                  dk7  rt        | j                  j                     } || ||	|
|f| j                  sdn| j                  | j                  |d|\  }} |j                  g |d j                         }|r||f}|S |f}|S )Nr<   r   r   eager        )r*   r   r   )r   r   r   r   r   r   r   r   r7   _attn_implementationr   r   r   r   reshaper   )r6   r   r   r   r   r   rw   hidden_shapequery_states
key_statesvalue_statesattention_interfacer   r   outputss                  r:   rH   zMarkupLMSelfAttention.forward|  sa    $))#2.CCbC$*B*BCzz-055lCMMaQRSXXm,11,?II!QO
zz-055lCMMaQRS(?;;++w6"9$++:Z:Z"[$7
%
  $}}C$2H2HLL
%
 
%
!\ *k));;;;FFH1B;- JUrI   NNF)rJ   rK   rL   r"   r@   r   r   FloatTensorboolr~   rH   rN   rO   s   @r:   r   r   f  so    60 7;15,1!||! !!2!23! E--.	!
 $D>! 
u||	!rI   r   c                        e Zd Z fdZd Z	 	 	 d	dej                  deej                     deej                     dee	   de
ej                     f
dZ xZS )
MarkupLMAttentionc                     t         |           t        |      | _        t	        |      | _        t               | _        y r   )r!   r"   r   r6   r   outputsetpruned_headsro   s     r:   r"   zMarkupLMAttention.__init__  s0    )&1	(0ErI   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   r=   )lenr   r6   r   r   r   r   r   r   r   r   r   r   union)r6   headsindexs      r:   prune_headszMarkupLMAttention.prune_heads  s   u:?749900$))2O2OQUQbQb
u
 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:rI   r   r   r   r   r   c                 p     | j                   |f|||d|}| j                  |d   |      }|f|dd  z   }|S N)r   r   r   r   r   )r6   r   )	r6   r   r   r   r   r   self_outputsattention_outputr   s	            r:   rH   zMarkupLMAttention.forward  s_     !tyy
)/	

 
  ;;|AF#%QR(88rI   r   )rJ   rK   rL   r"   r   r@   r   r   r   r   r~   rH   rN   rO   s   @r:   r   r     st    ";* 7;15,1|| !!2!23 E--.	
 $D> 
u||	rI   r   c                        e Zd Z fdZ	 	 	 d	dej
                  deej                     deej                     dee   de	ej
                     f
dZ
d Z xZS )
MarkupLMLayerc                     t         |           |j                  | _        d| _        t	        |      | _        t        |      | _        t        |      | _	        y )Nr   )
r!   r"   chunk_size_feed_forwardseq_len_dimr   	attentionr   intermediater   r   ro   s     r:   r"   zMarkupLMLayer.__init__  sI    '-'E'E$*6208$V,rI   r   r   r   r   r   c                      | j                   |f|||d|}|d   }|dd  }t        | j                  | j                  | j                  |      }	|	f|z   }|S r  )r
  r   feed_forward_chunkr  r	  )
r6   r   r   r   r   r   self_attention_outputsr  r   layer_outputs
             r:   rH   zMarkupLMLayer.forward  s     "0"
)/	"

 "
 2!4(,0##T%A%A4CSCSUe
  /G+rI   c                 L    | j                  |      }| j                  ||      }|S r   )r  r   )r6   r  intermediate_outputr  s       r:   r  z MarkupLMLayer.feed_forward_chunk  s,    "//0@A{{#68HIrI   r   )rJ   rK   rL   r"   r@   r   r   r   r   r~   rH   r  rN   rO   s   @r:   r  r    st    - 7;15,1|| !!2!23 E--.	
 $D> 
u||	2rI   r  c                        e Zd Z fdZe	 	 	 	 	 d
dej                  deej                     deej                     dee	   dee	   dee	   de
eej                     ef   fd	       Z xZS )MarkupLMEncoderc                     t         |           || _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        d| _	        y c c}w )NF)
r!   r"   r7   r   r/   r0   num_hidden_layersr  layergradient_checkpointing)r6   r7   rF   r9   s      r:   r"   zMarkupLMEncoder.__init__  sN    ]]5IaIaCb#caM&$9#cd
&+# $ds   A#r   r   r   r   output_hidden_statesreturn_dictr   c           	          |rdnd }|rdnd }	t        | j                        D ]4  \  }
}|r||fz   }|||
   nd } |d||||d|}|d   }|s,|	|d   fz   }	6 |r||fz   }t        |||	      S )N )r   r   r   r   r   r   )last_hidden_stater   
attentions)	enumerater  r   )r6   r   r   r   r   r  r  r   all_hidden_statesall_self_attentionsrF   layer_modulelayer_head_masklayer_outputss                 r:   rH   zMarkupLMEncoder.forward  s     #7BD$5b4(4 	POA|#$58H$H!.7.CilO( +-)"3	
 M *!,M &9]1=M<O&O#!	P$   1]4D D++*
 	
rI   )NNFFT)rJ   rK   rL   r"   r   r@   r   r   r   r   r   r~   r   rH   rN   rO   s   @r:   r  r    s    ,  7;15,1/4&*&
||&
 !!2!23&
 E--.	&

 $D>&
 'tn&
 d^&
 
uU\\"O3	4&
 &
rI   r  c                   h     e Zd ZU eed<   dZd Zedee	e
ej                  f      f fd       Z xZS )MarkupLMPreTrainedModelr7   markuplmc                 l   t        |t        j                        rm|j                  j                  j                  d| j                  j                         |j                  %|j                  j                  j                          yyt        |t        j                        rz|j                  j                  j                  d| j                  j                         |j                  2|j                  j                  |j                     j                          yyt        |t        j                        rJ|j                  j                  j                          |j                  j                  j                  d       yt        |t              r%|j                  j                  j                          yy)zInitialize the weightsr   )meanstdN      ?)r   r   r$   weightdatanormal_r7   initializer_ranger   zero_r1   rW   rj   fill_r   )r6   r   s     r:   _init_weightsz%MarkupLMPreTrainedModel._init_weights/  s'   fbii( MM&&CT[[5R5R&S{{&  &&( '-MM&&CT[[5R5R&S!!-""6#5#56<<> .-KK""$MM$$S) 89KK""$ :rI   pretrained_model_name_or_pathc                 *    t        |   |g|i |S r   )r!   from_pretrained)clsr2  
model_argsr   r9   s       r:   r4  z'MarkupLMPreTrainedModel.from_pretrainedA  s    w&'D\z\U[\\rI   )rJ   rK   rL   r   __annotations__base_model_prefixr1  classmethodr   r   r   osPathLiker4  rN   rO   s   @r:   r%  r%  )  sH    "%$ ]HU3PRP[P[K[E\<] ] ]rI   r%  c                       e Zd Zd fd	Zd Zd Zd Zee	 	 	 	 	 	 	 	 	 	 	 dde	e
j                     de	e
j                     de	e
j                     de	e
j                     d	e	e
j                     d
e	e
j                     de	e
j                     de	e
j                     de	e   de	e   de	e   deeef   fd              Z xZS )MarkupLMModelc                     t         |   |       || _        t        |      | _        t        |      | _        |rt        |      nd| _        | j                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)
r!   r"   r7   r]   r   r  encoderr   pooler	post_init)r6   r7   add_pooling_layerr9   s      r:   r"   zMarkupLMModel.__init__I  sM    
 	 ,V4&v.0AnV,t 	rI   c                 .    | j                   j                  S r   r   re   r   s    r:   get_input_embeddingsz"MarkupLMModel.get_input_embeddingsY  s    ...rI   c                 &    || j                   _        y r   rD  )r6   r   s     r:   set_input_embeddingsz"MarkupLMModel.set_input_embeddings\  s    */'rI   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr?  r  r
  r   )r6   heads_to_pruner  r   s       r:   _prune_headszMarkupLMModel._prune_heads_  sE    
 +002 	CLE5LLu%//;;EB	CrI   rV   rB   rC   r   r   ra   r   rv   r   r  r  r   c                    |	|	n| j                   j                  }	|
|
n| j                   j                  }
||n| j                   j                  }||t	        d      |#| j                  ||       |j                         }n!||j                         dd }nt	        d      ||j                  n|j                  }|t        j                  ||      }|&t        j                  |t        j                  |      }|j                  d      j                  d      }|j                  | j                  	      }d
|z
  dz  }||j                         dk(  rh|j                  d      j                  d      j                  d      j                  d      }|j!                  | j                   j"                  dddd      }nB|j                         dk(  r/|j                  d      j                  d      j                  d      }|j                  t%        | j'                               j                  	      }ndg| j                   j"                  z  }| j)                  ||||||      }| j+                  ||||	|
d      }|d   }| j,                  | j-                  |      nd}t/        |||j0                  |j2                        S )a  
        xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Tag IDs for each token in the input sequence, padded up to config.max_depth.
        xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Subscript IDs for each token in the input sequence, padded up to config.max_depth.

        Examples:

        ```python
        >>> from transformers import AutoProcessor, MarkupLMModel

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
        >>> model = MarkupLMModel.from_pretrained("microsoft/markuplm-base")

        >>> html_string = "<html> <head> <title>Page Title</title> </head> </html>"

        >>> encoding = processor(html_string, return_tensors="pt")

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        >>> list(last_hidden_states.shape)
        [1, 4, 768]
        ```NzDYou cannot specify both input_ids and inputs_embeds at the same timer<   z5You have to specify either input_ids or inputs_embeds)rs   rq   r   r   )rr   r*  g     r   )rV   rB   rC   ra   r   rv   T)r   r   r  r  )r  pooler_outputr   r  )r7   r   r  use_return_dictr   %warn_if_padding_and_no_attention_maskrt   rs   r@   r}   r{   rU   ru   r   rr   r>   rn   r  next
parametersr   r?  r@  r   r   r  )r6   rV   rB   rC   r   r   ra   r   rv   r   r  r  rw   rs   extended_attention_maskembedding_outputencoder_outputsr   r   s                      r:   rH   zMarkupLMModel.forwardg  s   N 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66y.Q#..*K&',,.s3KTUU%.%:!!@T@T!"ZZFCN!"[[EJJvVN"0":":1"="G"G"J"9"<"<4::"<"N#&)@#@H"L }}!#%//2<<Q?II"MWWXZ[	%,,T[[-J-JBPRTVXZ[	A%%//2<<R@JJ2N	!40A+B+H+HII!>!>>I??))%)' + 
 ,,#/!5 ' 
 *!,8<8OO4UY)-')77&11	
 	
rI   )T)NNNNNNNNNNN)rJ   rK   rL   r"   rE  rG  rK  r   r   r   r@   
LongTensorr   r   r   r~   r   rH   rN   rO   s   @r:   r=  r=  F  sJ    /0C  1559596:59371559,0/3&*c
E,,-c
 !!1!12c
 !!1!12	c

 !!2!23c
 !!1!12c
 u//0c
 E--.c
   1 12c
 $D>c
 'tnc
 d^c
 
u00	1c
  c
rI   r=  c            !           e Zd Z fdZee	 	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
eej                     deej                     dee	   dee	   dee	   de
eej                     ef   fd              Z xZS )MarkupLMForQuestionAnsweringc                     t         |   |       |j                  | _        t        |d      | _        t        j                  |j                  |j                        | _        | j                          y NF)rB  )
r!   r"   
num_labelsr=  r&  r   r$   r&   
qa_outputsrA  ro   s     r:   r"   z%MarkupLMForQuestionAnswering.__init__  sU      ++%fF))F$6$68I8IJ 	rI   rV   rB   rC   r   r   ra   r   rv   start_positionsend_positionsr   r  r  r   c                     ||n| j                   j                  }| j                  ||||||||||d      }|d   }| j                  |      }|j	                  dd      \  }}|j                  d      j                         }|j                  d      j                         }d}|	|
t        |	j                               dkD  r|	j                  d      }	t        |
j                               dkD  r|
j                  d      }
|j                  d      }|	j                  d|       |
j                  d|       t        |      } |||	      } |||
      }||z   d	z  }t        ||||j                  |j                  
      S )ae  
        xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Tag IDs for each token in the input sequence, padded up to config.max_depth.
        xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Subscript IDs for each token in the input sequence, padded up to config.max_depth.

        Examples:

        ```python
        >>> from transformers import AutoProcessor, MarkupLMForQuestionAnswering
        >>> import torch

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base-finetuned-websrc")
        >>> model = MarkupLMForQuestionAnswering.from_pretrained("microsoft/markuplm-base-finetuned-websrc")

        >>> html_string = "<html> <head> <title>My name is Niels</title> </head> </html>"
        >>> question = "What's his name?"

        >>> encoding = processor(html_string, questions=question, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**encoding)

        >>> answer_start_index = outputs.start_logits.argmax()
        >>> answer_end_index = outputs.end_logits.argmax()

        >>> predict_answer_tokens = encoding.input_ids[0, answer_start_index : answer_end_index + 1]
        >>> processor.decode(predict_answer_tokens).strip()
        'Niels'
        ```NT
rB   rC   r   r   ra   r   rv   r   r  r  r   r   r<   r=   )ignore_indexr   )lossstart_logits
end_logitsr   r  )r7   rN  r&  r[  splitsqueezer   r   rt   clamp_r   r   r   r  )r6   rV   rB   rC   r   r   ra   r   rv   r\  r]  r   r  r  r   r   logitsrb  rc  
total_lossignored_indexloss_fct
start_lossend_losss                           r:   rH   z$MarkupLMForQuestionAnswering.forward  s   ` &1%<k$++B]B]--))))%'/!5   
 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M""1m4  M2']CH!,@J
M:H$x/14J+%!!//))
 	
rI   )NNNNNNNNNNNNN)rJ   rK   rL   r"   r   r   r   r@   r   r   r   r~   r   rH   rN   rO   s   @r:   rW  rW    sa     -115151515/3,0042604,0/3&*\
ELL)\
 !.\
 !.	\

 !.\
 !.\
 u||,\
 ELL)\
  -\
 "%,,/\
  -\
 $D>\
 'tn\
 d^\
 
uU\\"$@@	A\
  \
rI   rW  zC
    MarkupLM Model with a `token_classification` head on top.
    )custom_introc                       e Zd Z fdZee	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
eej                     dee	   dee	   dee	   de
eej                     ef   fd              Z xZS )MarkupLMForTokenClassificationc                 d   t         |   |       |j                  | _        t        |d      | _        |j
                  |j
                  n|j                  }t        j                  |      | _	        t        j                  |j                  |j                        | _        | j                          y rY  )r!   r"   rZ  r=  r&  classifier_dropoutr)   r   r(   r*   r$   r&   
classifierrA  r6   r7   rq  r9   s      r:   r"   z'MarkupLMForTokenClassification.__init__D  s      ++%fF)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	rI   rV   rB   rC   r   r   ra   r   rv   labelsr   r  r  r   c                 t   ||n| j                   j                  }| j                  |||||||||
|d      }|d   }| j                  |      }d}|	Ft	               } ||j                  d| j                   j                        |	j                  d            }t        |||j                  |j                        S )a  
        xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Tag IDs for each token in the input sequence, padded up to config.max_depth.
        xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Subscript IDs for each token in the input sequence, padded up to config.max_depth.
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

        Examples:

        ```python
        >>> from transformers import AutoProcessor, AutoModelForTokenClassification
        >>> import torch

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
        >>> processor.parse_html = False
        >>> model = AutoModelForTokenClassification.from_pretrained("microsoft/markuplm-base", num_labels=7)

        >>> nodes = ["hello", "world"]
        >>> xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span"]
        >>> node_labels = [1, 2]
        >>> encoding = processor(nodes=nodes, xpaths=xpaths, node_labels=node_labels, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**encoding)

        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```NTr_  r   r<   ra  rg  r   r  )
r7   rN  r&  rr  r   r   rZ  r   r   r  )r6   rV   rB   rC   r   r   ra   r   rv   rt  r   r  r  r   r   r   ra  rj  s                     r:   rH   z&MarkupLMForTokenClassification.forwardR  s    \ &1%<k$++B]B]--))))%'/!5   
 "!* OOO<')H!&&r4;;+A+ABBD
 %$!//))	
 	
rI   NNNNNNNNNNNN)rJ   rK   rL   r"   r   r   r   r@   r   r   r   r~   r   rH   rN   rO   s   @r:   ro  ro  =  sI     -115151515/3,004)-,0/3&*L
ELL)L
 !.L
 !.	L

 !.L
 !.L
 u||,L
 ELL)L
  -L
 &L
 $D>L
 'tnL
 d^L
 
uU\\"N2	3L
  L
rI   ro  z
    MarkupLM Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c                       e Zd Z fdZee	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     deej                     deej                     deej                     d	eej                     d
eej                     dee	   dee	   dee	   de
eej                     ef   fd              Z xZS )!MarkupLMForSequenceClassificationc                 n   t         |   |       |j                  | _        || _        t	        |      | _        |j                  |j                  n|j                  }t        j                  |      | _
        t        j                  |j                  |j                        | _        | j                          y r   )r!   r"   rZ  r7   r=  r&  rq  r)   r   r(   r*   r$   r&   rr  rA  rs  s      r:   r"   z*MarkupLMForSequenceClassification.__init__  s      ++%f-)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	rI   rV   rB   rC   r   r   ra   r   rv   rt  r   r  r  r   c                    ||n| j                   j                  }| j                  |||||||||
|d      }|d   }| j                  |      }| j	                  |      }d}|	| j                   j
                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|	j                  t        j                  k(  s|	j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rIt               }| j                  dk(  r& ||j                         |	j                               }n |||	      }n| j                   j
                  dk(  r=t               } ||j                  d| j                        |	j                  d            }n,| j                   j
                  dk(  rt               } |||	      }t!        |||j"                  |j$                  	      S )
a  
        xpath_tags_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Tag IDs for each token in the input sequence, padded up to config.max_depth.
        xpath_subs_seq (`torch.LongTensor` of shape `(batch_size, sequence_length, config.max_depth)`, *optional*):
            Subscript IDs for each token in the input sequence, padded up to config.max_depth.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

        Examples:

        ```python
        >>> from transformers import AutoProcessor, AutoModelForSequenceClassification
        >>> import torch

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
        >>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/markuplm-base", num_labels=7)

        >>> html_string = "<html> <head> <title>Page Title</title> </head> </html>"
        >>> encoding = processor(html_string, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**encoding)

        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```NTr_  r   
regressionsingle_label_classificationmulti_label_classificationr<   rv  )r7   rN  r&  r*   rr  problem_typerZ  rr   r@   rU   rR   r	   re  r   r   r   r   r   r  )r6   rV   rB   rC   r   r   ra   r   rv   rt  r   r  r  r   r   rg  ra  rj  s                     r:   rH   z)MarkupLMForSequenceClassification.forward  s   Z &1%<k$++B]B]--))))%'/!5   
  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./'!//))	
 	
rI   rw  )rJ   rK   rL   r"   r   r   r   r@   r   r   r   r~   r   rH   rN   rO   s   @r:   ry  ry    sJ     -115151515/3,004)-,0/3&*\
ELL)\
 !.\
 !.	\

 !.\
 !.\
 u||,\
 ELL)\
  -\
 &\
 $D>\
 'tn\
 d^\
 
uU\\"$<<	=\
  \
rI   ry  )rW  ry  ro  r=  r%  )r   )r   N)?rM   r:  typingr   r   r   r@   torch.utils.checkpointr   torch.nnr   r   r	   activationsr   modeling_layersr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   r   r   r   r   utilsr   r   r   configuration_markuplmr   
get_loggerrJ   loggerModuler   r[   r]   r   r   r   r   r   r   r   r   floatr   r   r   r  r  r%  r=  rW  ro  ry  __all__r  rI   r:   <module>r     si    	 , ,    A A ! 9   ? > 2 
		H	%/ bii / f4 _ _F 299  RYY RYY  bii $ryy 0!")) !$ (,%II%<<% 
% <<	%
 U\\*% % % %%87BII 7v*		 *\%. %R.
bii .
b ]o ] ]8 E
+ E
 E
P j
#: j
 j
Z 
^
%< ^

^
B o
(? o
o
drI   