
    rhv                        d dl Z d dlmZ d dlmZ ddlmZ ddlmZm	Z	  e	j                  e      Z e       rd dlZd Z	 	 	 d"dee   d	ed
   dee   dedef   fdZ	 	 	 d"dee   d	ed
   dee   dedef   fdZ	 	 	 d"dee   d	ed
   dee   dedef   fdZ	 d#ded	d
dee   dedef   fdZ	 d#ded	d
dee   dedef   fdZ	 d#ded	d
dee   dedef   fdZeeeeeedZ	 	 d$dedededee   dee   f
dZd#dedee   fdZd#dedee   fdZd#dedee   fdZd#dedee   fdZd#dedee   fdZ d#dedee   fd Z!eeeee e!dZ"d#dedee   fd!Z#y)%    Nwraps)Optional   )PretrainedConfig)is_torch_availableloggingc                 B     d d t                fd       }|S )ad  
    Decorator function to update the RoPE parameters in the forward pass, if the model is using a dynamic RoPE
    (i.e. a RoPE implementation that may recompute its frequencies in the forward pass).

    Args:
        rope_forward (Callable):
            The forward pass of the RoPE implementation.

    Returns:
        The decorated forward pass.
    c                    t        j                  |      dz   }t        | j                  d      r| j                  j                  }n| j                  j
                  }||kD  rTt        | d      s)| j                  | j                  ||dz         \  | _        }| j                  d| j                  d       y| j                  j                  |      | _	        | j                  d| j                  d       y)	zbLongrope uses long factor if sequence is larger than original pretraining length, short otherwise.r    original_max_position_embeddingslong_inv_freqseq_leninv_freqF
persistentN)torchmaxhasattrconfigr   max_position_embeddingsrope_init_fnr   register_bufferoriginal_inv_freqto)selfposition_idsdevicer   r   _s         s/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/modeling_rope_utils.pylongrope_frequency_updatez6dynamic_rope_update.<locals>.longrope_frequency_update+   s    ))L)A-4;; BC/3{{/[/[,/3{{/R/R,5541(,(9(9KK1QTU1U ): )%"A   T-?-?E R &*%;%;%>%>v%FD"  T-C-CPU V    c                    t        j                  |      dz   }|| j                  kD  rA| j                  | j                  ||      \  }| _        | j                  d|d       || _        || j                  k  rj| j                  | j                  kD  rP| j                  j                  |      | _        | j                  d| j                  d       | j                  | _        yyy)a  
        dynamic RoPE layers should recompute `inv_freq` in the following situations:
        1 - growing beyond the cached sequence length (allow scaling)
        2 - the current sequence length is in the original scale (avoid losing precision with small sequences)
        r   r   r   Fr   N)
r   r   max_seq_len_cachedr   r   attention_scalingr   original_max_seq_lenr   r   )r   r   r   r   r   s        r    dynamic_frequency_updatez5dynamic_rope_update.<locals>.dynamic_frequency_update>   s     ))L)A-T,,,/3/@/@f^e/@/f,Hd,  X% H&-D#T...43J3JTMfMf3f &*%;%;%>%>v%FD"  T-C-CPU V&*&?&?D# 4g.r"   c                     d| j                   v r | ||j                         n$| j                   dk(  r | ||j                          | ||      S )Ndynamic)r   longrope)	rope_typer   )r   xr   r'   r!   rope_forwards      r    wrapperz$dynamic_rope_update.<locals>.wrapperQ   sJ    &$T<I^^z)%dLJD!\22r"   r   )r-   r.   r'   r!   s   ` @@r    dynamic_rope_updater/      s/    W&@& <3 3 Nr"   r   r   ztorch.devicer   returnztorch.Tensorc                 d   | j                   }t        | d      r| j                  nd}t        | dd      xs | j                  | j
                  z  }t        ||z        }d}d|t        j                  d|dt        j                        j                  |t        j                        |z  z  z  }||fS )	ax  
    Computes the inverse frequencies according to the original RoPE implementation
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
    partial_rotary_factor      ?head_dimNr      dtyper   r7   )
rope_thetar   r2   getattrhidden_sizenum_attention_headsintr   arangeint64r   float)	r   r   r   baser2   r4   dimattention_factorr   s	            r     _compute_default_rope_parametersrD   \   s    $ D<CFLc<dF88jmvz40dF4F4F&JdJd4dH
h..
/C du||AsAU[[ILLTZbgbmbmLnqttuvH%%%r"   c                 R    | j                   d   }t        | ||      \  }}||z  }||fS )a  
    Computes the inverse frequencies with linear scaling. Credits to the Reddit user /u/kaiokendev
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
    factor)rope_scalingrD   )r   r   r   rF   r   rC   s         r    '_compute_linear_scaling_rope_parametersrH   z   sD    $   *F "B&&RY!ZH
 H%%%r"   c                    | j                   }t        | d      r| j                  nd}t        | d| j                  | j
                  z        }t        ||z        }| j                  }| j                  d   }d}	||}ngt        |t        j                        rAt        j                  |t        j                  ||j                  |j                              }nt!        ||      }|||z  |z  |dz
  z
  ||dz
  z  z  z  }d|t        j"                  d|dt        j$                  	      j'                  |t        j(                  
      |z  z  z  }
|
|	fS )a  
    Computes the inverse frequencies with NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length, used to update the dynamic RoPE at inference time.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
    r2   r3   r4   rF   r7   r   r   r5   r   r6   r8   )r9   r   r2   r:   r;   r<   r=   r   rG   
isinstancer   Tensormaximumtensorr7   r   r   r>   r?   r   r@   )r   r   r   rA   r2   r4   rB   r   rF   rC   r   s              r    _compute_dynamic_ntk_parametersrO      sJ   & D<CFLc<dF88jmvz6+=+=A[A[+[\H
h..
/C$<<  *F )	GU\\	*--LL0gnn]

 g67 FW$'>>6A:NTW[^ab[bTcddDdu||AsAU[[ILLTZbgbmbmLnqttuvH%%%r"   c                 B   | j                   }t        | d      r| j                  nd}t        | d| j                  | j
                  z        }t        ||z        }| j                  d   }| j                  j                  d      }| j                  j                  d      }	| j                  j                  d      }
d| j                  v r| j                  d   }| j                  |z  }n| j                  }dd
}|)|	r|
rt         |||	       |||
      z        }n ||      }| j                  j                  d      xs d}| j                  j                  d      xs d	}d fd}d }|t        j                  d|d      j                  |t        j                        |z  z  }d|z  }d||z  z  }| j                  j                  dd      } |||||||      \  }}d	 ||||dz        j                  |t        j                        z
  }|d	|z
  z  ||z  z   }||fS )a  
    Computes the inverse frequencies with NTK scaling. Please refer to the
    [original paper](https://huggingface.co/papers/2309.00071)
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin.
    r2   r3   r4   rF   rC   mscalemscale_all_dimr   r   c                 J    | dk  ryd|z  t        j                  |       z  dz   S )Nr   r3   g?)mathlog)scalerQ   s     r    
get_mscalez,_compute_yarn_parameters.<locals>.get_mscale   s(    A:V|dhhuo-33r"   	beta_fast    	beta_slowc                     |t        j                  || dz  t         j                  z  z        z  dt        j                  |      z  z  S )zPInverse dimension formula to find the dimension based on the number of rotationsr5   )rT   rU   pi)num_rotationsrB   rA   r   s       r    find_correction_dimz5_compute_yarn_parameters.<locals>.find_correction_dim   sB    dhh6-!:Kdgg:UVWW\]`d`h`him`n\noor"   c                      | |||      } ||||      }|r,t        j                  |      x}}t        j                  |      }t        |d      t	        ||dz
        fS )z.Find dimension range bounds based on rotationsr   r   )rT   floorceilr   min)	low_rothigh_rotrB   rA   r   truncatelowhighr^   s	           r    find_correction_rangez7_compute_yarn_parameters.<locals>.find_correction_range   sc    !'36MN"8S$8OP

3'C#99T?D3{CcAg...r"   c                     | |k(  r|dz  }t        j                  |t         j                        | z
  || z
  z  }t        j                  |dd      }|S )NgMbP?r6   r   r   )r   r>   float32clamp)rb   r   rB   linear_func	ramp_funcs        r    linear_ramp_factorz4_compute_yarn_parameters.<locals>.linear_ramp_factor  sL    #:5LC||Cu}}=Cc	RKKQ2	r"   r   r5   r8   re   T)r   )r9   r   r2   r:   r;   r<   r=   rG   getr   r@   r   r>   r   )r   r   r   rA   r2   r4   rB   rF   rC   rQ   rR   r   rW   rX   rZ   rh   rn   	pos_freqsinv_freq_extrapolationinv_freq_interpolationre   rf   rg   inv_freq_extrapolation_factorr   r^   s                            @r    _compute_yarn_parametersrt      sR   $ D<CFLc<dF88jmvz6+=+=A[A[+[\H
h..
/C  *F**../AB  $$X.F((,,-=>N
 *V-@-@@+1+>+>?a+b(//2RR+1+I+I(4 n$Z%?*VUcBd%de)&1 ##''4:I##''49Ip/ aa03363UX[[\I 9_ FY$67""&&z48H%iCGgiqrIC %&(:3cQh(O(R(RZ`hmhshs(R(t$t!!&C"CD
 #@
@	A  %%%r"   c                    | j                   }t        | d      r| j                  nd}t        | d| j                  | j
                  z        }t        ||z        }| j                  d   }| j                  d   }| j                  j                  d      }	| j                  j                  d      }
t        | d      r&| j                  }| j                  | j                  z  }	n| j                  }|
I|	dk  rd}
nAt        j                  d	t        j                  |	      t        j                  |      z  z         }
|r,||kD  r't        j                  |t        j                   |
      }n&t        j                  |t        j                   |
      }t        j"                  d|dt        j$                  |
      j'                         |z  }d|||z  z  z  }||
fS )a~  
    Computes the inverse frequencies with LongRoPE scaling. Please refer to the
    [original implementation](https://github.com/microsoft/LongRoPE)
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin.
    r2   r3   r4   long_factorshort_factorrF   rC   r   r   rJ   r   r5   )r9   r   r2   r:   r;   r<   r=   rG   ro   r   r   rT   sqrtrU   r   rN   rj   r>   r?   r@   )r   r   r   rA   r2   r4   rB   rv   rw   rF   rC   r   ext_factorsinv_freq_shaper   s                  r    _compute_longrope_parametersr{   "  s   $ D<CFLc<dF88jmvz6+=+=A[A[+[\H
h..
/C%%m4K&&~6L  $$X.F**../AB
 v9:+1+R+R(//&2Y2YY+1+I+I( S="#yyTXXf-=Ii@j-j)jk 7==ll;emmFSll<u}}VT\\!S!5;;vNTTVY\\NkD.$889H%%%r"   c                    t        | ||      \  }}| j                  d   }| j                  d   }| j                  d   }| j                  d   }||z  }	||z  }
dt        j                  z  |z  }t	        j
                  ||	kD  ||z  |      }||z  |z
  ||z
  z  }d|z
  |z  |z  ||z  z   }||
k   ||	kD   z  }t	        j
                  |||      }||fS )a<  
    Computes the inverse frequencies for llama 3.1.

    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin.
    rF   low_freq_factorhigh_freq_factorr   r5   r   )rD   rG   rT   r\   r   where)r   r   r   r   rC   rF   r}   r~   old_context_lenlow_freq_wavelenhigh_freq_wavelenwaveleninv_freq_llamasmooth_factorsmoothed_inv_freqis_medium_freqs                   r    _compute_llama3_parametersr   X  s   $ "B&&RY!ZH  *F))*;<O**+=>))*LMO&8'*::$''kH$G [[+;!;X=NPXYN$w.@EUXgEghM]*n<vEXfHff!223BR8R6SSN[[1BNSN+++r"   )defaultlinearr)   yarnr*   llama3r+   received_keysrequired_keysoptional_keysignore_keysc                     d|v r|dhz  }|j                  d       |||z  }||z
  }|rt        d|  d|       |	||z
  |z
  }n||z
  }|rt        j                  d|  d|        yy)zYCompare the received keys in `config.rope_scaling` against the expected and optional keystyper+   Nz9Missing required keys in `rope_scaling` for 'rope_type'='z': z5Unrecognized keys in `rope_scaling` for 'rope_type'=')addKeyErrorloggerwarning)r+   r   r   r   r   missing_keysunused_keyss          r    _check_received_keysr     s     &!+& $ =0LRS\R]]`am`nopp #m3mC#m3NykY\]h\ijk r"   c                     | j                   }|j                  d|j                  dd             }dh}t        |j                               }t	        ||||       y )Nr+   r   r   )rG   ro   setkeysr   )r   r   rG   r+   r   r   s         r    !_validate_default_rope_parametersr     sT    &&L  l.>.>vt.LMI MM))+,MM=kZr"   c                 "   | j                   }|j                  d|j                  dd             }ddh}t        |j                               }t	        ||||       |d   }|t        |t              r|dk  rt        j                  d|        y y )Nr+   r   rF   r   r3   8`rope_scaling`'s factor field must be a float >= 1, got 	rG   ro   r   r   r   rK   r@   r   r   )r   r   rG   r+   r   r   rF   s          r    (_validate_linear_scaling_rope_parametersr     s    &&L  l.>.>vt.LMI (+M))+,MM=kZ(#F~Z6&3,QRXQYZ[ ;Gr"   c                 *   | j                   }|j                  d|j                  dd             }ddh}dh}t        |j                               }t	        |||||       |d   }|t        |t              r|dk  rt        j                  d|        y y )Nr+   r   rF   r   r   r3   r   r   )r   r   rG   r+   r   r   r   rF   s           r    )_validate_dynamic_scaling_rope_parametersr     s    &&L  l.>.>vt.LMI (+M78M))+,MM=-]hi(#F~Z6&3,QRXQYZ[ ;Gr"   c                    | j                   }|j                  d|j                  dd             }ddh}h d}t        |j                               }t	        |||||       |d   }|t        |t              r|dk  rt        j                  d|        |j                  d      }|-t        |t              r|d	k  rt        j                  d
|        |j                  d      }	|	(t        |	t              st        j                  d|	        |j                  d      }
|
(t        |
t              st        j                  d|
        |	xs d|
xs dk  rt        j                  d|	 d|
 d       y y )Nr+   r   rF   >   rQ   re   rX   rZ   rR   rC   r   r   r3   r   rC   r   L`rope_scaling`'s attention_factor field must be a float greater than 0, got rX   z6`rope_scaling`'s beta_fast field must be a float, got rZ   z6`rope_scaling`'s beta_slow field must be a float, got rY   r   zO`rope_scaling`'s beta_fast field must be greater than beta_slow, got beta_fast=z( (defaults to 32 if None) and beta_slow=z (defaults to 1 if None)r   )r   r   rG   r+   r   r   r   rF   rC   rX   rZ   s              r    _validate_yarn_parametersr     s   &&L  l.>.>vt.LMI (+MM ))+,MM=-]hi(#F~Z6&3,QRXQYZ[#''(:;#Z8H%-PTdghThZ[kZlm	
   -IZ	5%AOPY{[\  -IZ	5%AOPY{[\RIN+]^g]h i66?[@XZ	
 ,r"   c                    | j                   }|j                  d|j                  dd             }h d}h d}t        |j                               }t	        |||||       t        | d      r| j                  nd}t        | d| j                  | j                  z        }t        ||z        }	|j                  d	      }
t        |
t              s*t        d
 |
D              rt        j                  d|
        t!        |
      |	dz  k7  r't        j                  d|	dz   dt!        |
              |j                  d      }t        |t              s*t        d |D              rt        j                  d|        t!        |      |	dz  k7  r't        j                  d|	dz   dt!        |              t        | d      rt        j#                  d       y |j                  d      }|t        j                  d       n-t        |t$              r|dk  rt        j                  d|        |j                  d      }|/t        |t$              r|dk  rt        j                  d|        y y y )Nr+   r   >   r+   rv   rw   >   rF   rC   r   r   r2   r3   r4   rw   c              3   H   K   | ]  }t        |t        t        f        y wNrK   r=   r@   .0r,   s     r    	<genexpr>z0_validate_longrope_parameters.<locals>.<genexpr>  s     1dRS*Qe2M1d    "zC`rope_scaling`'s short_factor field must be a list of numbers, got r5   z5`rope_scaling`'s short_factor field must have length z, got rv   c              3   H   K   | ]  }t        |t        t        f        y wr   r   r   s     r    r   z0_validate_longrope_parameters.<locals>.<genexpr>  s     0bQRAU|1L0br   zB`rope_scaling`'s long_factor field must be a list of numbers, got z4`rope_scaling`'s long_factor field must have length r   aY  This model has set a `original_max_position_embeddings` field, to be used together with `max_position_embeddings` to determine a scaling factor. Please set the `factor` field of `rope_scaling`with this ratio instead -- we recommend the use of this field over `original_max_position_embeddings`, as it is compatible with most model architectures.rF   z1Missing required keys in `rope_scaling`: 'factor'r   rC   g        r   )rG   ro   r   r   r   r   r2   r:   r;   r<   r=   rK   listallr   r   lenwarning_oncer@   )r   r   rG   r+   r   r   r   r2   r4   rB   rw   rv   rF   rC   s                 r    _validate_longrope_parametersr     sI   &&L  l.>.>vt.LMI@MVM))+,MM=-]hi<CFLc<dF88jmvz6+=+=A[A[+[\H
h..
/C##N3LlD)c1dWc1d.d\]i\jkl
<C1H$NsVWxjX^_bco_p^qrs""=1Kk4(S0bVa0b-b[\g[hij
;3!8#McUVhZW]^abm^n]opq
 v9:A	
 !!(+>NNNOFE*fslNNUV\U]^_'++,>?'.6:JS:Pbcsbtu ;Q (r"   c                    | j                   }|j                  d|j                  dd             }h d}t        |j                               }t	        ||||       |d   }|t        |t              r|dk  rt        j                  d|        |d   }|d	   }|t        |t              st        j                  d
|        |t        |t              st        j                  d|        ||k  rt        j                  d| d|        |d   }	|	t        |	t              st        j                  d|	        |	| j                  k\  r&t        j                  d|	 d| j                          y y )Nr+   r   >   rF   r+   r}   r~   r   r   rF   r3   r   r}   r~   z<`rope_scaling`'s low_freq_factor field must be a float, got z=`rope_scaling`'s high_freq_factor field must be a float, got zc`rope_scaling`'s high_freq_factor field must be greater than low_freq_factor, got high_freq_factor=z and low_freq_factor=r   zP`rope_scaling`'s original_max_position_embeddings field must be an integer, got zg`rope_scaling`'s original_max_position_embeddings field must be less than max_position_embeddings, got z and max_position_embeddings=)rG   ro   r   r   r   rK   r@   r   r   r=   r   )
r   r   rG   r+   r   r   rF   r}   r~   r   s
             r    _validate_llama3_parametersr   &  s   &&L  l.>.>vt.LMIvM))+,MM=kZ(#F~Z6&3,QRXQYZ["#45O#$67j%&HUVeUfghz2BE'JVWgVhij?*q  5o5FH	

 (44V'W$'/zBbdg7h^/02	
 (6+I+IIu/00MfNlNlMmo	
 Jr"   c                     t        | dd      }|y|j                  d|j                  dd            }t        j                  |      }| || |       yt        j	                  d| d       y)	zO
    Validate the RoPE config arguments, given a `PretrainedConfig` object
    rG   Nr+   r   r   r   zTMissing validation function mapping in `ROPE_VALIDATION_FUNCTIONS` for 'rope_type'='')r:   ro   ROPE_VALIDATION_FUNCTIONSr   r   )r   r   rG   r+   validation_fns        r    rope_config_validationr   U  sw     6>48L   l.>.>vy.QRI-11)<M f+6bclbmmno	
r"   )NNNr   )NN)$rT   	functoolsr   typingr   configuration_utilsr   utilsr   r	   
get_logger__name__r   r   r/   r=   tupler@   rD   rH   rO   rt   r{   r   ROPE_INIT_FUNCTIONSstrr   r   r   r   r   r   r   r   r   r    r"   r    <module>r      s      1 . 
		H	% ;~ *.'+!&%&&^$& c]& >5 !	&> *.'+!&%&&^$& c]& >5 !	&> *.'+!*&%&*&^$*& c]*& >5 !	*&\ PTZ&Z&&4Z&?G}Z&
>5 !Z&| PT3&3&&43&?G}3&
>5 !3&n PT&,&,&4&,?G}&,
>5 !&,Z 05.$,(  $(!%lll l C=	l
 #l:[.> [XVY] [	\5E 	\T\]`Ta 	\\6F \U]^aUb \$
&6 $
Xc] $
N/*: /RU /d!
(8 !
xPS} !
L 168%-) 
#3 
(3- 
r"   