
    rh,                     >   d dl mZ d dlmZ d dlmZ d dlZd dlmZ ddl	m
Z
 ddlmZmZmZmZ ddlmZ dd	lmZ dd
lmZmZmZmZ  ej2                  e      Z G d dej8                        Ze G d de             Ze G d de             Ze G d de             Z y)    )ABC)partial)OptionalN   )Cache)BaseModelOutputWithPastQuestionAnsweringModelOutput SequenceClassifierOutputWithPastTokenClassifierOutput)	AutoModel)Unpack)TransformersKwargsauto_docstringcan_return_tupleloggingc                   &     e Zd ZdZdZ fdZ xZS )GradientCheckpointingLayera  Base class for layers with gradient checkpointing.

    This class enables gradient checkpointing functionality for a layer. By default, gradient checkpointing is disabled
    (`gradient_checkpointing = False`). When `model.set_gradient_checkpointing()` is called, gradient checkpointing is
    enabled by setting `gradient_checkpointing = True` and assigning a checkpointing function to `_gradient_checkpointing_func`.

    Important:

        When using gradient checkpointing with `use_reentrant=True`, inputs that require gradients (e.g. hidden states)
        must be passed as positional arguments (`*args`) rather than keyword arguments to properly propagate gradients.

        Example:

            ```python
            >>> # Correct - hidden_states passed as positional arg
            >>> out = self.layer(hidden_states, attention_mask=attention_mask)

            >>> # Incorrect - hidden_states passed as keyword arg
            >>> out = self.layer(hidden_states=hidden_states, attention_mask=attention_mask)
            ```
    Fc                    | j                   r| j                  rd}| j                  j                  }d| d}d|v r|d   rd|d<   |dz  }d}d|v r|d   d |d<   |dz  }d}d	|v r|d	   d |d	<   |d
z  }d}d|v r|d   d |d<   |dz  }d}|r)|j	                  d      dz   }t
        j                  |        | j                  t        t        | (  fi |g| S t        | (  |i |S )NFz7Caching is incompatible with gradient checkpointing in z	. Setting	use_cachez `use_cache=False`,Tpast_key_valuez `past_key_value=None`,past_key_valuesz `past_key_values=None`,
layer_pastz `layer_past=None`,,.)gradient_checkpointingtraining	__class____name__rstriploggerwarning_gradient_checkpointing_funcr   super__call__)selfargskwargsdo_warn
layer_namemessager   s         o/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/modeling_layers.pyr$   z#GradientCheckpointingLayer.__call__=   sK   &&4==G00JOPZ|[deGf$)<&+{#00  6)f5E.F.R+/'(44 F*v6G/H/T,0()55v%&*>*J'+|$00 !..-3w'4444WUW=M5XQW5X`[_``w000    )r   
__module____qualname____doc__r   r$   __classcell__r   s   @r+   r   r   $   s    , #!1 !1r,   r   c                       e Zd ZdZ fdZee	 	 	 	 	 	 	 ddeej                     deej                     deej                     dee   deej                     deej                     d	ee   d
ee   defd              Z xZS ) GenericForSequenceClassificationmodelc                    t         |   |       |j                  | _        t        | | j                  t        j                  |             t        j                  |j                  | j                  d      | _
        | j                          y )NF)bias)r#   __init__
num_labelssetattrbase_model_prefixr   from_confignnLinearhidden_sizescore	post_initr%   configr   s     r+   r7   z)GenericForSequenceClassification.__init__e   sd      ++d,,i.C.CF.KLYYv114??O
 	r,   	input_idsattention_maskposition_idsr   inputs_embedslabelsr   r'   returnc           	          t        | | j                        |f|||||d|}	|	j                  }
| j                  |
      }||j                  d   }n|j                  d   }| j
                  j                  |dk7  rt        d      | j
                  j                  d}n||| j
                  j                  k7  j                  |j                  t        j                        }t        j                  |j                  d   |j                  t        j                        }||z  j                  d      }n.d}t        j                  | j                   j"                   d       |t        j                  ||j                        |f   }d }|| j%                  |||| j
                  	      }t'        |||	j(                  |	j*                  |	j,                  
      S )NrD   rE   r   rF   r   r   r   z=Cannot handle batch sizes > 1 if no padding token is defined.)devicedtypez will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`)rL   )logitsrG   pooled_logitsrB   )lossrN   r   hidden_states
attentions)getattrr:   last_hidden_stater?   shaperB   pad_token_id
ValueErrortorL   torchint32arangeargmaxr    warning_oncer   r   loss_functionr
   r   rQ   rR   )r%   rC   rD   rE   r   rF   rG   r   r'   transformer_outputsrQ   rN   
batch_sizelast_non_pad_tokennon_pad_masktoken_indicesrO   rP   s                     r+   forwardz(GenericForSequenceClassification.forwardo   s    8]wtTE[E[7\8
)%+'8
 8
 ,==M* "+J&,,Q/J;;##+
a\]];;##+!#"%)A)AAEEfmmUZU`U`aL!LL)<V]]Z_ZeZefM"/,">!F!Fr!J!#>>**+ ,Z Z
 u||Jv}}MOaab%%VFR_hlhshs%tD/ /??-;;*55
 	
r,   NNNNNNN)r   r-   r.   r:   r7   r   r   r   rY   
LongTensorTensorr   FloatTensorboolr   r   r
   rd   r0   r1   s   @r+   r3   r3   a   s      151537+/59-1$(8
E,,-8
 !.8
 u//0	8

 "%8
   1 128
 ))*8
 D>8
 +,8
 
*8
  8
r,   r3   c                   &    e Zd ZdZ fdZd Zd Zee	 	 	 	 	 	 	 dde	e
j                     de	e
j                     de	e
j                     de	e   d	e	e
j                     d
e	e
j                     de	e
j                     dee   defd              Z xZS )GenericForQuestionAnsweringr4   c                     t         |   |       t        | | j                  t	        j
                  |             t        j                  |j                  d      | _	        | j                          y )N   )r#   r7   r9   r:   r   r;   r<   r=   r>   
qa_outputsr@   rA   s     r+   r7   z$GenericForQuestionAnswering.__init__   sQ     d,,i.C.CF.KL))F$6$6: 	r,   c                 B    t        | | j                        j                  S NrS   r:   embed_tokens)r%   s    r+   get_input_embeddingsz0GenericForQuestionAnswering.get_input_embeddings   s    tT334AAAr,   c                 :    |t        | | j                        _        y rp   rq   )r%   values     r+   set_input_embeddingsz0GenericForQuestionAnswering.set_input_embeddings   s    =Bd,,-:r,   rC   rD   rE   r   rF   start_positionsend_positionsr'   rH   c                     t        | | j                        |f||||d|}	|	j                  }
| j                  |
      }|j	                  dd      \  }}|j                  d      j                         }|j                  d      j                         }d }|| | j                  ||||fi |}t        ||||	j                  |	j                        S )N)rD   rE   r   rF   r   rK   )dim)rP   start_logits
end_logitsrQ   rR   )rS   r:   rT   rn   splitsqueeze
contiguousr^   r	   rQ   rR   )r%   rC   rD   rE   r   rF   rw   rx   r'   outputssequence_outputrN   r{   r|   rP   s                  r+   rd   z#GenericForQuestionAnswering.forward   s     ,Q749O9O+P,
)%+',
 ,
 "331#)<<r<#: j#++B/::<''+668
&=+D%4%%lJQ^ibhiD+%!!//))
 	
r,   re   )r   r-   r.   r:   r7   rs   rv   r   r   r   rY   rf   rg   r   rh   r   r   r	   rd   r0   r1   s   @r+   rk   rk      s    BC  151537+/596:48%
E,,-%
 !.%
 u//0	%

 "%%
   1 12%
 "%"2"23%
   0 01%
 +,%
 
&%
  %
r,   rk   c                        e Zd ZdZ fdZee	 	 	 	 	 	 	 ddeej                     deej                     deej                     dee   deej                     deej                     d	ee   d
efd              Z xZS )GenericForTokenClassificationr4   c                    t         |   |       |j                  | _        t        | | j                  t        j                  |             t        |dd       |j                  }nt        |dd       |j                  }nd}t        j                  |      | _        t        j                  |j                  |j                        | _        | j!                          y )Nclassifier_dropouthidden_dropoutg?)r#   r7   r8   r9   r:   r   r;   rS   r   r   r<   Dropoutdropoutr=   r>   r?   r@   )r%   rB   r   r   s      r+   r7   z&GenericForTokenClassification.__init__   s      ++d,,i.C.CF.KL6/6B!'!:!:V-t4@!'!6!6!$zz"45YYv1163D3DE
 	r,   rC   rD   rE   r   rF   rG   r   rH   c           	      ,    t        | | j                        |f|||||d|}	|	j                  }
| j                  |
      }
| j	                  |
      }d }|| j                  ||| j                        }t        |||	j                  |	j                        S )NrJ   )rP   rN   rQ   rR   )
rS   r:   rT   r   r?   r^   rB   r   rQ   rR   )r%   rC   rD   rE   r   rF   rG   r   r'   r   r   rN   rP   s                r+   rd   z%GenericForTokenClassification.forward   s     ,Q749O9O+P,
)%+',
 ,
 "33,,7O,%%ffdkkBD$!//))	
 	
r,   re   )r   r-   r.   r:   r7   r   r   r   rY   rf   rg   r   rh   ri   r   rd   r0   r1   s   @r+   r   r      s    "  151537+/59-1$(!
E,,-!
 !.!
 u//0	!

 "%!
   1 12!
 ))*!
 D>!
 
!
  !
r,   r   )!abcr   	functoolsr   typingr   rY   torch.nnr<   cache_utilsr   modeling_outputsr   r	   r
   r   models.autor   processing_utilsr   utilsr   r   r   r   
get_loggerr   r    Moduler   r3   rk   r    r,   r+   <module>r      s           # $ P P 
		H	%:1 :1z G
s G
 G
T 9
# 9
 9
x 7
C 7
 7
r,   