
    rhe                        d dl Z d dlmZ d dlZd dlmZmZ ddlmZ  ej                  e	      Z
 G d dej                        Z G d d	ej                        Z G d
 dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d de      Zi deded d!d"fd#ed$ed%ed&d'ifd(ed)ed*ed+ej0                  d,ed-ed.ed/ej2                  d0ed1ej4                  d2ej6                  d3ej8                  ej8                  ej:                  ej<                  d4Z ee      Z d5 Z! e!d%      Z" e!d$      Z# e!d      Z$ e!d#      Z% e!d.      Z& e!d3      Z' e!d-      Z( e!d,      Z)y)6    N)OrderedDict)Tensornn   )loggingc                        e Zd ZdZdedefdZy)PytorchGELUTanha&  
    A fast C implementation of the tanh approximation of the GeLU activation function. See
    https://huggingface.co/papers/1606.08415.

    This implementation is equivalent to NewGELU and FastGELU but much faster. However, it is not an exact numerical
    match due to rounding errors.
    inputreturnc                 D    t         j                  j                  |d      S )Ntanh)approximate)r   
functionalgeluselfr
   s     k/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/activations.pyforwardzPytorchGELUTanh.forward$   s    }}!!%V!<<    N__name__
__module____qualname____doc__r   r    r   r   r	   r	      s    =V = =r   r	   c                        e Zd ZdZdedefdZy)NewGELUActivationz
    Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
    the Gaussian Error Linear Units paper: https://huggingface.co/papers/1606.08415
    r
   r   c                     d|z  dt        j                  t        j                  dt        j                  z        |dt        j
                  |d      z  z   z        z   z  S )N      ?      ?       @Hm?g      @)torchr   mathsqrtpipowr   s     r   r   zNewGELUActivation.forward.   sP    U{cEJJtyytww/G5S[^c^g^ghmor^sSsKs/t$uuvvr   Nr   r   r   r   r   r   (   s    
wV w wr   r   c                   J     e Zd ZdZddef fdZdedefdZdedefdZ xZ	S )	GELUActivationa  
    Original Implementation of the GELU activation function in Google BERT repo when initially created. For
    information: OpenAI GPT's GELU is slightly different (and gives slightly different results): 0.5 * x * (1 +
    torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) This is now written in C in nn.functional
    Also see the Gaussian Error Linear Units paper: https://huggingface.co/papers/1606.08415
    use_gelu_pythonc                     t         |           |r| j                  | _        y t        j
                  j                  | _        y N)super__init___gelu_pythonactr   r   r   )r   r*   	__class__s     r   r.   zGELUActivation.__init__:   s/    ((DH}}))DHr   r
   r   c                 j    |dz  dt        j                  |t        j                  d      z        z   z  S )Nr   r    r!   )r#   erfr$   r%   r   s     r   r/   zGELUActivation._gelu_pythonA   s,    s{cEIIediin.D$EEFFr   c                 $    | j                  |      S r,   r0   r   s     r   r   zGELUActivation.forwardD       xxr   )F)
r   r   r   r   boolr.   r   r/   r   __classcell__r1   s   @r   r)   r)   2   s=    * *G& GV GV  r   r)   c                        e Zd ZdZdedefdZy)FastGELUActivationz}
    Applies GELU approximation that is slower than QuickGELU but more accurate. See: https://github.com/hendrycks/GELUs
    r
   r   c                 \    d|z  dt        j                  |dz  dd|z  |z  z   z        z   z  S )Nr   r    g3E?r"   )r#   r   r   s     r   r   zFastGELUActivation.forwardM   s:    U{cEJJu|/CsXX]M]`eMeGe/f$gghhr   Nr   r   r   r   r;   r;   H   s    iV i ir   r;   c                        e Zd ZdZdedefdZy)QuickGELUActivationzr
    Applies GELU approximation that is fast but somewhat inaccurate. See: https://github.com/hendrycks/GELUs
    r
   r   c                 8    |t        j                  d|z        z  S )NgZd;?)r#   sigmoidr   s     r   r   zQuickGELUActivation.forwardV   s    u}}UU]333r   Nr   r   r   r   r>   r>   Q   s    4V 4 4r   r>   c                   <     e Zd ZdZdedef fdZdedefdZ xZS )ClippedGELUActivationa  
    Clip the range of possible GeLU outputs between [min, max]. This is especially useful for quantization purpose, as
    it allows mapping negatives values in the GeLU spectrum. For more information on this trick, please refer to
    https://huggingface.co/papers/2004.09602.

    Gaussian Error Linear Unit. Original Implementation of the gelu activation function in Google Bert repo when
    initially created.

    For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): 0.5 * x * (1 +
    torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))). See https://huggingface.co/papers/1606.08415
    minmaxc                 l    ||kD  rt        d| d| d      t        | 	          || _        || _        y )Nzmin should be < max (got min: z, max: ))
ValueErrorr-   r.   rC   rD   )r   rC   rD   r1   s      r   r.   zClippedGELUActivation.__init__g   s>    9=cU'#aPQQr   xr   c                 j    t        j                  t        |      | j                  | j                        S r,   )r#   clipr   rC   rD   )r   rH   s     r   r   zClippedGELUActivation.forwardo   s!    zz$q'488TXX66r   )	r   r   r   r   floatr.   r   r   r8   r9   s   @r   rB   rB   Z   s.    
E  7 7F 7r   rB   c                   2     e Zd ZdZ fdZdedefdZ xZS )AccurateGELUActivationz
    Applies GELU approximation that is faster than default and more accurate than QuickGELU. See:
    https://github.com/hendrycks/GELUs

    Implemented along with MEGA (Moving Average Equipped Gated Attention)
    c                 x    t         |           t        j                  dt        j                  z        | _        y )N   )r-   r.   r$   r%   r&   precomputed_constantr   r1   s    r   r.   zAccurateGELUActivation.__init__{   s'    $(IIa$''k$:!r   r
   r   c                     d|z  dt        j                  | j                  |dt        j                  |d      z  z   z        z   z  S )Nr   r   r"      )r#   r   rP   r'   r   s     r   r   zAccurateGELUActivation.forward   sE    U{a%**T-F-F%RZ]b]f]fglno]pRpJp-q"rrssr   )r   r   r   r   r.   r   r   r8   r9   s   @r   rM   rM   s   s#    ;tV t tr   rM   c                   B     e Zd ZdZ fdZdedefdZdedefdZ xZS )MishActivationz
    See Mish: A Self-Regularized Non-Monotonic Activation Function (Misra., https://huggingface.co/papers/1908.08681). Also
    visit the official repository for the paper: https://github.com/digantamisra98/Mish
    c                 `    t         |           t        j                  j                  | _        y r,   )r-   r.   r   r   mishr0   rQ   s    r   r.   zMishActivation.__init__   s    ==%%r   r
   r   c                 l    |t        j                  t        j                  j	                  |            z  S r,   )r#   r   r   r   softplusr   s     r   _mish_pythonzMishActivation._mish_python   s%    uzz"--"8"8"?@@@r   c                 $    | j                  |      S r,   r5   r   s     r   r   zMishActivation.forward   r6   r   )	r   r   r   r   r.   r   rZ   r   r8   r9   s   @r   rU   rU      s6    
&A& AV AV  r   rU   c                        e Zd ZdZdedefdZy)LinearActivationz[
    Applies the linear activation function, i.e. forwarding input directly to output.
    r
   r   c                     |S r,   r   r   s     r   r   zLinearActivation.forward   s    r   Nr   r   r   r   r]   r]      s    V  r   r]   c                       e Zd ZdZddZy)LaplaceActivationz
    Applies elementwise activation based on Laplace function, introduced in MEGA as an attention activation. See
    https://huggingface.co/papers/2209.10655

    Inspired by squared relu, but with bounded range and gradient for better stability
    c                     ||z
  j                  |t        j                  d      z        }ddt        j                  |      z   z  S )Nr!   r   r    )divr$   r%   r#   r3   )r   r
   musigmas       r   r   zLaplaceActivation.forward   s<      3!78cEIIe,,--r   N)g۞?g ^/?r   r   r   r   r   r   r   r   r`   r`      s    .r   r`   c                       e Zd ZdZd Zy)ReLUSquaredActivationz`
    Applies the relu^2 activation introduced in https://huggingface.co/papers/2109.08668v2
    c                 n    t         j                  j                  |      }t        j                  |      }|S r,   )r   r   relur#   square)r   r
   relu_appliedsquareds       r   r   zReLUSquaredActivation.forward   s)    }}))%0,,|,r   Nre   r   r   r   rg   rg      s    r   rg   c                        e Zd Z fdZ xZS )ClassInstantierc                 d    t         |   |      }t        |t              r|n|i f\  }} |di |S )Nr   )r-   __getitem__
isinstancetuple)r   keycontentclskwargsr1   s        r   rp   zClassInstantier.__getitem__   s6    '%c*!+GU!;g'2V}V}r   )r   r   r   rp   r8   r9   s   @r   rn   rn      s     r   rn   r   gelu_10i
   )rC   rD   	gelu_fastgelu_newgelu_pythonr*   Tgelu_pytorch_tanhgelu_accuratelaplace
leaky_relulinearrW   
quick_geluri   relu2relu6r@   silu)swishr   preluc           	      |    | t         v r	t         |    S t        d|  dt        t         j                                      )Nz	function z not found in ACT2FN mapping )ACT2FNKeyErrorlistkeys)activation_strings    r   get_activationr      sB    F"'((#4"55RSWX^XcXcXeSfRghiir   )*r$   collectionsr   r#   r   r   utilsr   
get_loggerr   loggerModuler	   r   r)   r;   r>   rB   rM   rU   r]   r`   rg   rn   	LeakyReLUReLUReLU6SigmoidSiLUTanhPReLUACT2CLSr   r   r{   rz   r   ry   r   r   rW   
linear_actr   r   r   <module>r      sl    #    
		H	%
=bii 
=w		 wRYY ,i i4")) 47BII 72tRYY t RYY "ryy 
.		 
.BII k 
N%s2'>? # !	
 N%6$=>  +   ",,  N % BGG " RXX  rzz!" BGG#$ WWGGXX), 
	!j ]+*%f;'	L)
ffH%
r   