
    rh[                        d Z ddlZddlZddlZddlZddlZddlmZmZm	Z	 ddl
mZmZ ddlmZmZ ddlmZmZmZ ddlmZ ddlmZmZ dd	lmZmZmZmZmZ ddlZ dd
l!m"Z" ddl#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, i Z- e$j\                  e/      Z0 e*       r
ddl1Z1ddl2m3Z3  G d de4      Z5d Z6d Z7d Z8d Z9d Z:d Z;d Z<d Z=d Z>d Z?d Z@d ZAd ZBd ZCd  ZDd! ZEd" ZFd# ZGd$ ZHd% ZId& ZJd' ZK G d( d)e      ZL e*       rddlMm#c mNZO d*eLd+ePeQe   d,f   fd-ZR	 ded.ee   d/d,d+eLfd0ZS e"j                   e&              e"j                  d1      k\  r8 eOj                  eLeR eeSeL2      eLj                   d3eLj^                   4       n eOj                  eLeR eeSeL2              G d5 d6eXe      ZY G d7 d8eY      ZZ G d9 d:eY      Z[ G d; d<      Z\d= Z]d> Z^dfd?ed@eXdAeXfdBZ_edgdDe`fdE       ZadedFZbdG ZcdedHZddI ZedJ ZfdK ZgdL ZhdM ZidedNeeQ   fdOZj G dP dQedCR      ZkdSeleXef   d+e`fdTZmdUeXd+e`fdVZndWdXdYeXdZefd[ZodWdXdYeXfd\Zpd] Zqe e,d^_       G d` da                    Zrdb Zs G dc dde      Zty)hz
Generic utilities
    N)OrderedDictUserDictdefaultdict)IterableMutableMapping)	ExitStackcontextmanager)	dataclassfieldsis_dataclass)Enum)partialwraps)AnyCallableContextManagerOptional	TypedDict)version   )logging   )get_torch_versionis_flax_availableis_mlx_availableis_tf_availableis_torch_availableis_torch_fx_proxyrequires)model_addition_debugger_contextc                       e Zd ZdZddZy)cached_propertyz
    Descriptor that mimics @property but caches output in member variable.

    From tensorflow_datasets

    Built-in in functools from Python 3.8.
    Nc                     || S | j                   t        d      d| j                   j                  z   }t        ||d       }|| j                  |      }t	        |||       |S )Nzunreadable attribute	__cached_)fgetAttributeError__name__getattrsetattr)selfobjobjtypeattrcacheds        m/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/utils/generic.py__get__zcached_property.__get__C   sg    ;K99 !788TYY///dD)>YYs^FCv&    N)r'   
__module____qualname____doc__r0    r1   r/   r"   r"   :   s    r1   r"   c                 R    | j                         } | dv ry| dv ryt        d|       )zConvert a string representation of truth to true (1) or false (0).

    True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values are 'n', 'no', 'f', 'false', 'off', and '0'.
    Raises ValueError if 'val' is anything else.
    >   1tyonyestruer   >   0fnnoofffalser   zinvalid truth value )lower
ValueError)vals    r/   	strtoboolrG   R   s:     ))+C
22
33
+C73
44r1   c                     t        t        |             }|j                  d      ry|j                  d      ry|j                  d      ry|j                  d      ry|j                  d	      ry
y)z
    Tries to guess the framework of an object `x` from its repr (brittle but will help in `is_tensor` to try the
    frameworks in a smart order, without the need to import the frameworks).
    z<class 'torch.ptz<class 'tensorflow.tfz<class 'jaxjaxz<class 'numpy.npz<class 'mlx.mlxN)strtype
startswith)xrepresentations     r/   infer_framework_from_reprrS   `   sn    
 a\N  !12		"	"#8	9		"	"=	1		"	"#3	4		"	">	2 
3r1   c                    t         t        t        t        t        d}t        |       }|g n|g}|dk7  r|j                  d       |j                  |D cg c]  }||dfvs
| c}       |D ci c]  }|||   
 c}S c c}w c c}w )z
    Returns an (ordered since we are in Python 3.7+) dictionary framework to test function, which places the framework
    we can guess from the repr first, then Numpy, then the others.
    )rI   rJ   rK   rL   rM   rL   )is_torch_tensoris_tf_tensoris_jax_tensoris_numpy_arrayis_mlx_arrayrS   appendextend)rQ   framework_to_testpreferred_framework
frameworksr?   s        r/   _get_frameworks_and_test_funcr_   r   s      4A6*29L8MJd"$"3\QqATVZ@[7[q\]-78A ##88 ]8s   A? A?/Bc                     t        |       }|j                         D ]  } ||       s y t        |       ryt               rddlm} t        | |      ryy)z
    Tests if `x` is a `torch.Tensor`, `tf.Tensor`, `jaxlib.xla_extension.DeviceArray`, `np.ndarray` or `mlx.array`
    in the order defined by `infer_framework_from_repr`
    Tr   )TracerF)r_   valuesr   r   jax.corera   
isinstance)rQ   framework_to_test_func	test_funcra   s       r/   	is_tensorrg      sW     ;1=+224 	Q<
 #a r1   c                 6    t        | t        j                        S r2   )rd   rL   ndarrayrQ   s    r/   	_is_numpyrk      s    a$$r1   c                     t        |       S )z/
    Tests if `x` is a numpy array or not.
    )rk   rj   s    r/   rX   rX      s     Q<r1   c                 6    dd l }t        | |j                        S Nr   )torchrd   TensorrQ   ro   s     r/   	_is_torchrr          a&&r1   c                 0    t               sdS t        |       S )z]
    Tests if `x` is a torch tensor or not. Safe to call even if torch is not installed.
    F)r   rr   rj   s    r/   rU   rU      s     +,5>)A,>r1   c                 6    dd l }t        | |j                        S rn   )ro   rd   devicerq   s     r/   _is_torch_devicerw      rs   r1   c                 0    t               sdS t        |       S )z]
    Tests if `x` is a torch device or not. Safe to call even if torch is not installed.
    F)r   rw   rj   s    r/   is_torch_devicery      s     +,5E2B12EEr1   c                     dd l }t        | t              rt        ||       rt	        ||       } nyt        | |j
                        S )Nr   F)ro   rd   rN   hasattrr(   dtyperq   s     r/   _is_torch_dtyper}      s9    !S5!q!Aa%%r1   c                 0    t               sdS t        |       S )z\
    Tests if `x` is a torch dtype or not. Safe to call even if torch is not installed.
    F)r   r}   rj   s    r/   is_torch_dtyper      s     +,5D/!2DDr1   c                 6    dd l }t        | |j                        S rn   )
tensorflowrd   rp   rQ   rJ   s     r/   _is_tensorflowr      s    a##r1   c                 0    t               sdS t        |       S )zg
    Tests if `x` is a tensorflow tensor or not. Safe to call even if tensorflow is not installed.
    F)r   r   rj   s    r/   rV   rV      s     ()5@~a/@@r1   c                 p    dd l }t        |d      r|j                  |       S t        | |j                        S )Nr   is_symbolic_tensor)r   r{   r   rd   rp   r   s     r/   _is_tf_symbolic_tensorr      s4     r'($$Q''a##r1   c                 0    t               sdS t        |       S )z
    Tests if `x` is a tensorflow symbolic tensor or not (ie. not eager). Safe to call even if tensorflow is not
    installed.
    F)r   r   rj   s    r/   is_tf_symbolic_tensorr      s    
 ()5H/Ea/HHr1   c                 :    dd l m} t        | |j                        S rn   )	jax.numpynumpyrd   ri   )rQ   jnps     r/   _is_jaxr      s    a%%r1   c                 0    t               sdS t        |       S )zY
    Tests if `x` is a Jax tensor or not. Safe to call even if jax is not installed.
    F)r   r   rj   s    r/   rW   rW      s     *+5;;r1   c                 :    dd l m} t        | |j                        S rn   )mlx.corecorerd   array)rQ   mxs     r/   _is_mlxr     s    a""r1   c                 0    t               sdS t        |       S )zZ
    Tests if `x` is a mlx array or not. Safe to call even when mlx is not installed.
    F)r   r   rj   s    r/   rY   rY     s     )*5:
:r1   c                 .   t        | t        t        f      r| S t        | t        t        f      r-| j                         D ci c]  \  }}|t        |       c}}S t        | t        t        f      r	 t        j                  |       }t        j                  |j                  t        j                        s.t        j                  |j                  t        j                        r|j                         S 	 | D cg c]  }t        |       c}S d d d d d}t#        |       }|j                         D ]  \  }} ||       s ||   |       c S  t        | t        j$                        r| j                         S | S c c}}w # t         $ r Y w xY wc c}w )zc
    Convert a TensorFlow tensor, PyTorch tensor, Numpy array or python list to a python list.
    c                 "    | j                         S r2   tolistr+   s    r/   <lambda>zto_py_obj.<locals>.<lambda>       #**, r1   c                 >    | j                         j                         S r2   )r   r   r   s    r/   r   zto_py_obj.<locals>.<lambda>!  s    #))+,,. r1   c                 H    t        j                  |       j                         S r2   )rL   asarrayr   r   s    r/   r   zto_py_obj.<locals>.<lambda>"  s    2::c?113 r1   c                 "    | j                         S r2   r   r   s    r/   r   zto_py_obj.<locals>.<lambda>#  r   r1   rI   rJ   rK   rL   )rd   intfloatdictr   items	to_py_objlisttuplerL   r   
issubdtyper|   integerfloatingr   	Exceptionr_   number)	r+   kvarroframework_to_py_objre   	frameworkrf   s	            r/   r   r     sV    #U|$
	C$)	*,/IIK8DAq9Q<88	C$	'	((3-C}}SYY

3r}}SYYPRP[P[7\zz|# 8] '**	!** '.3&	 ;3? 6 < < > 7	9S>1&y1#667
 #ryy!zz|
7 9  		*s   E=3B F 9F	FFc                 t   d d d d d}t        | t        t        f      r-| j                         D ci c]  \  }}|t	        |       c}}S t        | t
        t        f      rt        j                  |       S t        |       }|j                         D ]  \  }} ||       s ||   |       c S  | S c c}}w )zc
    Convert a TensorFlow tensor, PyTorch tensor, Numpy array or python list to a Numpy array.
    c                 Z    | j                         j                         j                         S r2   )detachcpur   r   s    r/   r   zto_numpy.<locals>.<lambda>9  s    #**,**,224 r1   c                 "    | j                         S r2   )r   r   s    r/   r   zto_numpy.<locals>.<lambda>:  s    #))+ r1   c                 ,    t        j                  |       S r2   )rL   r   r   s    r/   r   zto_numpy.<locals>.<lambda>;  s    2::c? r1   c                     | S r2   r6   r   s    r/   r   zto_numpy.<locals>.<lambda><  s    # r1   r   )
rd   r   r   r   to_numpyr   r   rL   r   r_   )r+   framework_to_numpyr   r   re   r   rf   s          r/   r   r   3  s     5%*	 #h'(+.99;741a8A;77	C$	'xx} ;3? 6 < < > 6	9S>0%i0556 J 8s   B4c                   ~     e Zd ZdZddZ fdZd Zd Zd Zd Z	d	 Z
d
 Z fdZ fdZ fdZdee   fdZ xZS )ModelOutputa  
    Base class for all model outputs as dataclass. Has a `__getitem__` that allows indexing by integer or slice (like a
    tuple) or strings (like a dictionary) that will ignore the `None` attributes. Otherwise behaves like a regular
    python dictionary.

    <Tip warning={true}>

    You can't unpack a `ModelOutput` directly. Use the [`~utils.ModelOutput.to_tuple`] method to convert it to a tuple
    before.

    </Tip>
    returnc                 D   t               rt        j                  t                     t        j                  d      k\  r?ddlm}  || t        t        t        |       | j                   d| j                          yddlm}  || t        t        t        |              yy)	zRegister subclasses as pytree nodes.

        This is necessary to synchronize gradients when using `torch.nn.parallel.DistributedDataParallel` with
        `static_graph=True` with modules that output `ModelOutput` subclasses.
        2.2r   )register_pytree_nodeoutput_type.serialized_type_name)_register_pytree_nodeN)r   r   parser   torch.utils._pytreer   _model_output_flattenr   _model_output_unflattenr3   r'   r   )clsr   r   s      r/   __init_subclass__zModelOutput.__init_subclass__[  s     }}.01W]]55IID$)3E,/NN+;1S\\N)K	 F%)3E  r1   c                     t        |   |i | | j                  t        k7  }|r;t	        |       s/t        | j                   d| j                  j                   d      y y )Nr   z` is not a dataclass. This is a subclass of ModelOutput and so must use the @dataclass decorator.)super__init__	__class__r   r   	TypeErrorr3   r'   )r*   argskwargsis_modeloutput_subclassr   s       r/   r   zModelOutput.__init__t  sl    $)&) #'..K"?"<+=??#1T^^%<%<$= >_ _  ,>"r1   c                     t               }t        |      s"t         j                  j                   d      t        d |dd D              s"t         j                  j                   d      t         |d   j                        }t         fd|dd D              }|rt        |      st        |t              r|j                         }d}n	 t        |      }d}|rt              D ]  \  }}t        |t        t         f      r!t        |      d
k7  st        |d   t"              s)|dk(  r| |d   j                  <   nt        d| d       yt%         |d   |d          |d   |d    |d   <    y|| |d   j                  <   yy|D ]*  }t         |j                        }	|	|	 |j                  <   , y# t        $ r d	}Y w xY w)zeCheck the ModelOutput dataclass.

        Only occurs if @dataclass decorator has been used.
        z has no fields.c              3   8   K   | ]  }|j                   d u   y wr2   )default).0fields     r/   	<genexpr>z,ModelOutput.__post_init__.<locals>.<genexpr>  s     GU5==D(Gs   r   Nz. should not have more than one required field.r   c              3   N   K   | ]  }t        |j                        d u   y wr2   r(   namer   r   r*   s     r/   r   z,ModelOutput.__post_init__.<locals>.<genexpr>  s!     #d%GD%**$=$E#ds   "%TFr   zCannot set key/value for z&. It needs to be a tuple (key, value).)r   lenrE   r   r'   allr(   r   rg   rd   r   r   iterr   	enumerater   r   rN   r)   )
r*   class_fieldsfirst_fieldother_fields_are_noneiteratorfirst_field_iteratoridxelementr   r   s
   `         r/   __post_init__zModelOutput.__post_init__  s   
 d| <  7 78HIIGl126FGG 7 788fghhdLO$8$89 ##dS_`a`bSc#d d ;)?+t,&,,.'+$1#K0H+/( $$-h$7 6LC%ge}=WQRARZdelmneoqtZu!89DDa!5!56 #-";G9Dj k#  D'!*gaj9qz-+21:WQZ(6 (-8\!_))* ) & )D%**-='(D$)/ ! 1+0(1s   F? ?GGc                 H    t        d| j                  j                   d      )Nz$You cannot use ``__delitem__`` on a 
 instance.r   r   r'   r*   r   r   s      r/   __delitem__zModelOutput.__delitem__  s#    >t~~?V?V>WWabccr1   c                 H    t        d| j                  j                   d      )Nz#You cannot use ``setdefault`` on a r   r   r   s      r/   
setdefaultzModelOutput.setdefault  s#    =dnn>U>U=VV`abbr1   c                 H    t        d| j                  j                   d      )NzYou cannot use ``pop`` on a r   r   r   s      r/   popzModelOutput.pop  s"    6t~~7N7N6OzZ[[r1   c                 H    t        d| j                  j                   d      )NzYou cannot use ``update`` on a r   r   r   s      r/   updatezModelOutput.update  s#    9$..:Q:Q9RR\]^^r1   c                     t        |t              rt        | j                               }||   S | j	                         |   S r2   )rd   rN   r   r   to_tuple)r*   r   
inner_dicts      r/   __getitem__zModelOutput.__getitem__  s7    adjjl+Ja= ==?1%%r1   c                 n    || j                         v r|t        | 	  ||       t        |   ||       y r2   )keysr   __setitem____setattr__)r*   r   valuer   s      r/   r   zModelOutput.__setattr__  s4    499;5#4Ge,D%(r1   c                 F    t         |   ||       t         | 	  ||       y r2   )r   r   r   )r*   keyr  r   s      r/   r   zModelOutput.__setitem__  s!    C'C'r1   c                      t               st         	         S t         	         ^}}}t         fdt	               D              }||g|S )Nc              3   J   K   | ]  }t        |j                          y wr2   r   r   s     r/   r   z)ModelOutput.__reduce__.<locals>.<genexpr>  s     I5WT5::.Is    #)r   r   
__reduce__r   r   )r*   callable_args	remainingr   r   s   `    r/   r  zModelOutput.__reduce__  sQ    D!7%''&+g&8&:#%)IF4LII)	))r1   c                 H     t         fd j                         D              S )za
        Convert self to a tuple containing all the attributes/keys that are not `None`.
        c              3   (   K   | ]	  }|     y wr2   r6   )r   r   r*   s     r/   r   z'ModelOutput.to_tuple.<locals>.<genexpr>  s     2T!W2s   )r   r   r*   s   `r/   r   zModelOutput.to_tuple  s     2diik222r1   )r   N)r'   r3   r4   r5   r   r   r   r   r   r   r   r   r   r   r  r   r   r   __classcell__)r   s   @r/   r   r   M  sT    22)hdc\_&)(*3%* 3r1   r   outputr   z_torch_pytree.Contextc                 f    t        | j                               t        | j                               fS r2   )r   rb   r   )r  s    r/   r   r     s#    FMMO$d6;;=&999r1   rb   contextc           
      8     |di t        t        ||             S )Nr6   )r   zip)rb   r  r   s      r/   r   r     s    
 8T#gv"6788r1   r   r   r   r   c                        e Zd ZdZed        Zy)ExplicitEnumzC
    Enum with more explicit error message for missing values.
    c           
      ~    t        | d| j                   dt        | j                  j	                                      )Nz is not a valid z, please select one of )rE   r'   r   _value2member_map_r   )r   r  s     r/   	_missing_zExplicitEnum._missing_  s?    g%cll^3J4PSPfPfPkPkPmKnJop
 	
r1   N)r'   r3   r4   r5   classmethodr  r6   r1   r/   r  r     s     
 
r1   r  c                       e Zd ZdZdZdZdZy)PaddingStrategyz
    Possible values for the `padding` argument in [`PreTrainedTokenizerBase.__call__`]. Useful for tab-completion in an
    IDE.
    longest
max_length
do_not_padN)r'   r3   r4   r5   LONGEST
MAX_LENGTH
DO_NOT_PADr6   r1   r/   r  r    s    
 GJJr1   r  c                   $    e Zd ZdZdZdZdZdZdZy)
TensorTypez
    Possible values for the `return_tensors` argument in [`PreTrainedTokenizerBase.__call__`]. Useful for
    tab-completion in an IDE.
    rI   rJ   rL   rK   rM   N)	r'   r3   r4   r5   PYTORCH
TENSORFLOWNUMPYJAXMLXr6   r1   r/   r"  r"    s"    
 GJE
C
Cr1   r"  c                   .    e Zd ZdZdee   fdZd Zd Zy)ContextManagersz
    Wrapper for `contextlib.ExitStack` which enters a collection of context managers. Adaptation of `ContextManagers`
    in the `fastcore` library.
    context_managersc                 0    || _         t               | _        y r2   )r*  r   stack)r*   r*  s     r/   r   zContextManagers.__init__*  s     0[
r1   c                 \    | j                   D ]  }| j                  j                  |        y r2   )r*  r,  enter_context)r*   context_managers     r/   	__enter__zContextManagers.__enter__.  s)    #44 	6OJJ$$_5	6r1   c                 <     | j                   j                  |i | y r2   )r,  __exit__r   s      r/   r2  zContextManagers.__exit__2  s    

T,V,r1   N)	r'   r3   r4   r5   r   r   r   r0  r2  r6   r1   r/   r)  r)  $  s"    
!n)= !6-r1   r)  c                 T   t        |       }|dk(  r t        j                  | j                        }nD|dk(  r t        j                  | j                        }nt        j                  | j
                        }|j                  D ]%  }|dk(  s	|j                  |   j                  du s% y y)zr
    Check if a given model can return loss.

    Args:
        model_class (`type`): The class of the model.
    rJ   rI   return_lossTF)infer_frameworkinspect	signaturecallforward__call__
parametersr   )model_classr   r7  ps       r/   can_return_lossr>  6  s      ,ID%%k&6&67		d	%%k&9&9:	%%k&:&:;	!! )"6"6q"9"A"AT"I r1   c                    | j                   }t        |       }|dk(  r t        j                  | j                        }nD|dk(  r t        j                  | j
                        }nt        j                  | j                        }d|v r#|j                  D cg c]  }d|v s|dv s| c}S |j                  D cg c]	  }d|v s| c}S c c}w c c}w )zq
    Find the labels used by a given model.

    Args:
        model_class (`type`): The class of the model.
    rJ   rI   QuestionAnsweringlabel)start_positionsend_positions)r'   r5  r6  r7  r8  r9  r:  r;  )r<  
model_namer   r7  r=  s        r/   find_labelsrE  L  s     %%J,ID%%k&6&67		d	%%k&9&9:	%%k&:&:;	j($//ma7a<1HlClmm$//@a7a<@@ n@s   C!C6	C Cd
parent_key	delimiterc                 0    dd}t         || ||            S )z/Flatten a nested dict into a single level dict.c              3      K   | j                         D ]`  \  }}|rt        |      |z   t        |      z   n|}|r5t        |t              r%t	        |||      j                         E d {    [||f b y 7 w)N)rH  )r   rN   rd   r   flatten_dict)rF  rG  rH  r   r   r  s         r/   _flatten_dictz#flatten_dict.<locals>._flatten_dicte  sp     GGI 	DAq:D#j/I-A6!CZ>2'3)DJJLLL1f	 Ms   A&A9(A7)A9 r   )r   )rF  rG  rH  rL  s       r/   rK  rK  b  s     aY788r1   Fuse_temp_dirc              #   t   K   |r"t        j                         5 }| d d d        y |  y # 1 sw Y   y xY wwr2   )tempfileTemporaryDirectory)working_dirrO  tmp_dirs      r/   working_or_temp_dirrU  p  s=     ((* 	gM	 	 	 	s   8,858c                 R   t        |       rt        j                  | |      S t        |       r|| j                  S  | j
                  | S t        |       rddl}|j                  | |      S t        |       rddl	m
} |j                  | |      S t        dt        |        d      )z
    Framework-agnostic version of `numpy.transpose` that will work on torch/TensorFlow/Jax tensors as well as NumPy
    arrays.
    )axesNr   )permz"Type not supported for transpose: r   )rX   rL   	transposerU   TpermuterV   r   rW   r   r   rE   rO   )r   rW  rJ   r   s       r/   rY  rY  y  s    
 e||E--		,uww@MEMM4,@@	e	||E|--	u	}}U}..=d5k]!LMMr1   c                 0   t        |       rt        j                  | |      S t        |       r | j                  | S t	        |       rddl}|j                  | |      S t        |       rddlm} |j                  | |      S t        dt        |        d      )z
    Framework-agnostic version of `numpy.reshape` that will work on torch/TensorFlow/Jax tensors as well as NumPy
    arrays.
    r   Nz Type not supported for reshape: r   )rX   rL   reshaperU   rV   r   rW   r   r   rE   rO   )r   newshaperJ   r   s       r/   r]  r]    s    
 ezz%**		u}}h''	e	zz%**	u	{{5(++;DK=JKKr1   c                 `   t        |       rt        j                  | |      S t        |       r$|| j                         S | j                  |      S t	        |       rddl}|j                  | |      S t        |       rddlm} |j                  | |      S t        dt        |        d      )z
    Framework-agnostic version of `numpy.squeeze` that will work on torch/TensorFlow/Jax tensors as well as NumPy
    arrays.
    axisNdimr   z Type not supported for squeeze: r   )rX   rL   squeezerU   rV   r   rW   r   r   rE   rO   r   ra  rJ   r   s       r/   rd  rd    s    
 ezz%d++		"&,u}}KEMMdM4KK	e	zz%dz++	u	{{5t{,,;DK=JKKr1   c                 :   t        |       rt        j                  | |      S t        |       r| j	                  |      S t        |       rddl}|j                  | |      S t        |       rddlm	} |j                  | |      S t        dt        |        d      )z
    Framework-agnostic version of `numpy.expand_dims` that will work on torch/TensorFlow/Jax tensors as well as NumPy
    arrays.
    rb  r   Nr`  z$Type not supported for expand_dims: r   )rX   rL   expand_dimsrU   	unsqueezerV   r   rW   r   r   rE   rO   re  s       r/   rg  rg    s    
 e~~eT**		4((	e	~~e$~//	u	u400?U}ANOOr1   c                    t        |       rt        j                  |       S t        |       r| j	                         S t        |       rddl}|j                  |       S t        |       r| j                  S t        dt        |        d      )z|
    Framework-agnostic version of `numpy.size` that will work on torch/TensorFlow/Jax tensors as well as NumPy arrays.
    r   Nz$Type not supported for tensor_size: r   )
rX   rL   sizerU   numelrV   r   rW   rE   rO   )r   rJ   s     r/   tensor_sizerl    sr     ewwu~		{{}	e	wwu~	u	zz?U}ANOOr1   c                 X   t        j                  |       D ]  }|j                  }|j                  }|j	                  d      s|j	                  d      s|dk(  r y|j	                  d      s|dk(  r y|j	                  d      s|j	                  d	      s|d
k(  s y t        d|  d      )z
    Infers the framework of a given model without using isinstance(), because we cannot guarantee that the relevant
    classes are imported or available.
    r   kerasTFPreTrainedModelrJ   ro   PreTrainedModelrI   flaxrK   FlaxPreTrainedModelz%Could not infer framework from class r   )r6  getmror3   r'   rP   r   )r<  
base_classmoduler   s       r/   r5  r5    s    
 nn[1 
P
&&""\*f.?.?.HDTgLgw'43D+Dv&&*;*;E*BdNcFc
P ?}ANOOr1   c                     t               st        |       S ddl}|j                  j	                         r1t        | |j                        r| j                  |j                        S t        |       S )zk
    Casts an input to a torch int64 tensor if we are in a tracing context, otherwise to a Python int.
    r   N)	r   r   ro   jit
is_tracingrd   rp   toint64rq   s     r/   	torch_intr{    sQ     1v %		 4 4 6:a;V144b\_`a\bbr1   c                     t               st        |       S ddl}|j                  j	                         r1t        | |j                        r| j                  |j                        S t        |       S )zo
    Casts an input to a torch float32 tensor if we are in a tracing context, otherwise to a Python float.
    r   N)	r   r   ro   rw  rx  rd   rp   ry  float32rq   s     r/   torch_floatr~    sQ     1v"'))"6"6"8Z5<<=X144d^abc^ddr1   extrac                 4    | xs g } t        |       fd}|S )aI  
    Decorator to filter out named arguments that are not in the function signature.

    This decorator ensures that only the keyword arguments that match the function's signature, or are specified in the
    `extra` list, are passed to the function. Any additional keyword arguments are filtered out and a warning is issued.

    Parameters:
        extra (`Optional[list]`, *optional*):
            A list of extra keyword argument names that are allowed even if they are not in the function's signature.

    Returns:
        Callable:
            A decorator that wraps the function and filters out invalid keyword arguments.

    Example usage:

        ```python
        @filter_out_non_signature_kwargs(extra=["allowed_extra_arg"])
        def my_function(arg1, arg2, **kwargs):
            print(arg1, arg2, kwargs)

        my_function(arg1=1, arg2=2, allowed_extra_arg=3, invalid_arg=4)
        # This will print: 1 2 {"allowed_extra_arg": 3}
        # And issue a warning: "The following named arguments are not valid for `my_function` and were ignored: 'invalid_arg'"
        ```
    c                      t        j                         }t        |j                  j	                               }|j                        d|v d|v d _        t                fd       }|S )Nr*   r   Tc                     i }i }|j                         D ]  \  }}|v r|||<   |||<    |r|D cg c]  }d| d
 }}dj                  |      }
r| d   j                  j                  dz   }n	r| d   j                  dz   }nd}t	        j
                  d| j                   d| t        d	        | i |S c c}w )
N'z, r   r   rN  z1The following named arguments are not valid for `z` and were ignored: r   )
stacklevel)r   joinr   r'   warningswarnUserWarning)r   r   valid_kwargsinvalid_kwargsr   r   invalid_kwargs_names
cls_prefixfuncis_class_methodis_instance_methodvalid_kwargs_to_passs           r/   wrapperzCfilter_out_non_signature_kwargs.<locals>.decorator.<locals>.wrapper3  s    LN *1,,&'LO()N1%	* :H'IQ!A3a'I$'I'+yy1E'F$ &!%a!2!2!;!;c!AJ$!%a!1!1C!7J!#JG
|TXTaTaSb c**>)?A 	 ...% (Js   B?)r6  r7  setr;  r   union _filter_out_non_signature_kwargsr   )r  sigfunction_named_argsr  r  r  r  extra_params_to_passs   `   @@@r/   	decoratorz2filter_out_non_signature_kwargs.<locals>.decorator'  s}    %!#.."5"5"782889MN $'::#66 15-	t	/ 
	/> r1   )r  )r  r  r  s     @r/   filter_out_non_signature_kwargsr  	  s&    6 KREu:,\ r1   c                       e Zd ZU dZed   ed<   ee   ed<   ee   ed<   ee   ed<   ed   ed<   ed   ed	<   ee   ed
<   ee   ed<   y)TransformersKwargsav  
    Keyword arguments to be passed to the loss function

    Attributes:
        num_items_in_batch (`Optional[torch.Tensor]`, *optional*):
            Number of items in the batch. It is recommended to pass it when
            you are doing gradient accumulation.
        output_hidden_states (`Optional[bool]`, *optional*):
            Most of the models support outputing all hidden states computed during the forward pass.
        output_attentions (`Optional[bool]`, *optional*):
            Turn this on to return the intermediary attention scores.
        output_router_logits (`Optional[bool]`, *optional*):
            For MoE models, this allows returning the router logits to compute the loss.
        cumulative_seqlens_q (`torch.LongTensor`, *optional*)
            Gets cumulative sequence length for query state.
        cumulative_seqlens_k (`torch.LongTensor`, *optional*)
            Gets cumulative sequence length for key state.
        max_length_q (`int`, *optional*):
            Maximum sequence length for query state.
        max_length_k (`int`, *optional*):
            Maximum sequence length for key state.
    ztorch.Tensornum_items_in_batchoutput_hidden_statesoutput_attentionsoutput_router_logitsztorch.LongTensorcumulative_seqlens_qcumulative_seqlens_kmax_length_qmax_length_kN)r'   r3   r4   r5   r   __annotations__boolr   r6   r1   r/   r  r  X  s^    . !00"4.(~%"4.("#566"#5663-3-r1   r  )totalconfig_dictc                 
    d| v S )z3Checks whether a config dict is a timm config dict.pretrained_cfgr6   )r  s    r/   is_timm_config_dictr  z  s    {**r1   pretrained_model_pathc                    | yt        |       } t        j                  j                  |       }t        j                  j	                  |       }|rE| j                  d      r4t        |       5 }t        j                  |      }ddd       t        |      S |rt        j                  j                  t        j                  j                  | d            rRt        t        j                  j                  | d            5 }t        j                  |      }ddd       t        |      S y# 1 sw Y   t              S xY w# 1 sw Y   t              S xY w)zA
    Checks whether a checkpoint is a timm model checkpoint.
    NFz.jsonzconfig.json)rN   ospathisfileisdirendswithopenjsonloadr  existsr  )r  is_fileis_dirr?   r  s        r/   is_timm_local_checkpointr    s    $   56ggnn23GWW]]01F (11':'( 	'A))A,K	'";// "''...C]!ST"'',,4mDE 	'))A,K	'";//	'";//	'";//s   +D&<D<&D9<Eru  ztorch.nn.Moduler  r  c                 b    t        | ||       | j                         D ]  }t        |||        y)z5
    Set a value to a module and all submodules.
    N)r)   childrenset_attribute_for_modules)ru  r  r  	submodules       r/   r  r    s2     FC__& 9	!)S%89r1   c                 v    t        | |      rt        | |       | j                         D ]  }t        ||        y)z:
    Delete a value from a module and all submodules.
    N)r{   delattrr  del_attribute_from_modules)ru  r  r  s      r/   r  r    s8    
 vs__& 3	"9c23r1   c                 .     t                fd       }|S )z
    Decorator to wrap model method, to call output.to_tuple() if return_dict=False passed as a kwarg or
    use_return_dict=False is set in the config.

    Note:
        output.to_tuple() convert output to tuple skipping all `None` values.
    c                     t        | d      r| j                  j                  nd}|j                  d|      }||} | g|i |}|s t	        |t
              s|j                         }|S )NconfigTreturn_dict)r{   r  r  r   rd   r   r   )r*   r   r   r  return_dict_passedr  r  s         r/   r  z!can_return_tuple.<locals>.wrapper  sj    18x1Hdkk--d#ZZ{C),Kd,T,V,:fe#<__&Fr1   r   r  r  s   ` r/   can_return_tupler    s"     4[  Nr1   )ro   )backendsc                   X    e Zd ZU dZded<   dZee   ed<   dZee	   ed<   dZ
ee	   ed<   y)	OutputRecordera  
    Configuration for recording outputs from a model via hooks.

    Attributes:
        target_class (Type): The class (e.g., nn.Module) to which the hook will be attached.
        index (Optional[int]): If the output is a tuple/list, optionally record only at a specific index.
        layer_name (Optional[str]): Name of the submodule to target (if needed), e.g., "transformer.layer.3.attn".
        class_name (Optional[str]): Name of the class to which the hook will be attached. Could be the suffix of class name in some cases.
    ztype[torch.nn.Module]target_classr   indexN
layer_name
class_name)r'   r3   r4   r5   r  r  r   r   r  rN   r  r6   r1   r/   r  r    s9     *)E8C= $J$ $J$r1   r  c                 .     t                fd       }|S )z
    Decorator to intercept specific layer outputs without using hooks.
    Compatible with torch.compile (Dynamo tracing).
    c                    |j                  d      }|t        | j                  dd      }|j                  dd       }|t        | j                  dd      }t        | dd      r%| j                  r|rt
        j                  d       d}||d<   |j                         }d|v r |d   j                         D ]
  \  }}|||<    t        j                  t        | j                        i       }|D ci c]X  }d| |j                  d| t        | j                  d| |j                  d	t        | j                  d	d                        Z }	}t        t              g }
fd
}t        |	j                               rlg }|j                         D ]  \  }}|	j                  d| d      st!        |t"              s|g}|D ]c  }t!        |t$              s>d|v rdnd}t!        |t              sd n|}t!        |t              s|nd }t%        |||      }|j'                  ||f       e  | j)                         D ]  \  }}|D ]  \  }}|j*                  t!        ||j*                        s)|j,                  5|j/                  |j,                        sQ|j0                  |j0                  |vrl|j2                  } |||||j4                        |_        |
j'                  ||f          | g|i |}|
D ]  \  }}||_         D ]  }|dk(  r]|   d d |<   t7        |d      r|xx   |j8                  fz  cc<   n$t7        |d      r|xx   |j:                  fz  cc<   |   ||<   e|dk(  rMt!        ||   t"              r1t=        ||         dk(  r |   dd d   ||<   |   dd d   |d|z   <   |   ||<   |   ||<    |du r|j?                         }|S c c}w )N	use_cacheFr  Tgradient_checkpointingzX`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.r   output_r  c                 >     t               fd       }|S )Nc                     dk(  r"t                 dk(  rxx   | d   fz  cc<   |j                  dd      r>t        |j                  dd      |j                  d            5   | i |}d d d        n | i |}t        t              sxx   |fz  cc<   |S |    vr|   f<   |S xx   |   fz  cc<   |S # 1 sw Y   PxY w)Nhidden_statesr   debug_ioFdebug_io_dirz~/model_debugprune_layers)r   getr    rd   r   )r   r   r  collected_outputsr  r  ru  orig_forwards      r/   wrapped_forwardzZcheck_model_inputs.<locals>.wrapper.<locals>.make_capture_wrapper.<locals>.wrapped_forward	  s   /)c2CC2H.IQ.N%c*tAwj8*::j%08

>? KVZZXfMg ? ".t!>v!>? ?
 *4:6:F!&%0%c*vi7*  E]."3328-1A)#.  *#.6%=2BB.? ?s   '	CCr  )ru  r  r  r  r  r  s   ```` r/   make_capture_wrapperzAcheck_model_inputs.<locals>.wrapper.<locals>.make_capture_wrapper  s"    <  !& #"r1   r  r   r   )r  r  r  vision_hidden_stateslast_hidden_state
attentionsr   cross_) r  r(   r  r   trainingloggerwarning_oncecopyr   _CAN_RECORD_REGISTRYrN   r   r   r   anyrb   rd   r   r  rZ   named_modulesr  r  r  r  r9  r  r{   r  r  r   r   )r*   r   r   r  r  all_argsr   r   capture_flagsrecordable_keysmonkey_patched_layersr  capture_tasksr  layer_specsspecsr  r  r  r   ru  original_forwardoutputsr  r  s                          @r/   r  z#check_model_inputs.<locals>.wrapper  sT   JJ{+	[%@Ijj5!$++}dCK4159dmmPYj I'{;;=x *002  1  -00T^^1DbI #

  aSM8<<!KKaSMLL!4gdkkK^`e6fg 

 

 (. "	#. %%'(M$1$7$7$9 7 [&**WSE?EB!+t4#.-K( 7E%e^<%4%;1;E31GTU
4>uc4JuPT .LPUbl m!((#u677 !% 2 2 4 Qf"/ 
QJC**6:feN`N`;a((4uGWGW9X ++7E<L<LTX<X$+1>>()=fFVX[]b]h]h)i-44f>N5OP
QQ t-d-f-(= 	.$F$-FN	. % 	6Co%):3)?)D!#&7$:;%c*w/K/K.MM*W&9:%c*w/H/H.JJ*05$mC0$7Cc@R<SWX<X#4S#9!$Q$#?GCL.?.DQTT.JGHsN+#4S#9GCL05!	6" %&&(Gu

s   0AO!r  r  s   ` r/   check_model_inputsr    s%     4[q qf Nr1   c                   d    e Zd ZdZi Zd Zd Zd Zd Zd Z	d Z
eded	efd
       Zdee   fdZy)GeneralInterfacez
    Dict-like object keeping track of a class-wide mapping, as well as a local one. Allows to have library-wide
    modifications though the class mapping, as well as local modifications in a single file with the local mapping.
    c                     i | _         y r2   _local_mappingr  s    r/   r   zGeneralInterface.__init__c  s
     r1   c                 Z    || j                   v r| j                   |   S | j                  |   S r2   )r  _global_mappingr*   r  s     r/   r   zGeneralInterface.__getitem__f  s2    $%%%&&s++##C((r1   c                 >    | j                   j                  ||i       y r2   )r  r   )r*   r  r  s      r/   r   zGeneralInterface.__setitem__l  s    ""C<0r1   c                     | j                   |= y r2   r  r  s     r/   r   zGeneralInterface.__delitem__p  s    $r1   c                 H    t        i | j                  | j                        S r2   )r   r  r  r  s    r/   __iter__zGeneralInterface.__iter__s  s$    Ct++Ct/B/BCDDr1   c                 ~    t        | j                  j                         | j                  j                         z        S r2   )r   r  r   r  r  s    r/   __len__zGeneralInterface.__len__w  s0    4'',,.1D1D1I1I1KKLLr1   r  r  c                 >    | j                   j                  ||i       y r2   )r  r   )r   r  r  s      r/   registerzGeneralInterface.registerz  s    ""C<0r1   r   c                 4    t        | j                               S r2   )r   r   r  s    r/   
valid_keyszGeneralInterface.valid_keys~  s    DIIK  r1   N)r'   r3   r4   r5   r  r   r   r   r   r   r  r  rN   r   r  r   r  r6   r1   r/   r  r  Y  s`     O!)1%EM 13 1x 1 1!DI !r1   r  r2   rM  )F)ur5   r6  r  r  rQ  r  collectionsr   r   r   collections.abcr   r   
contextlibr   r	   dataclassesr
   r   r   enumr   	functoolsr   r   typingr   r   r   r   r   r   rL   	packagingr   utilsr   import_utilsr   r   r   r   r   r   r   r  
get_loggerr'   r  ro   model_debugging_utilsr    propertyr"   rG   rS   r_   rg   rk   rX   rr   rU   rw   ry   r}   r   r   rV   r   r   r   rW   r   rY   r   r   r   r   _pytree_torch_pytreer   r   r   r   r   r   r3   r   rN   r  r  r"  r)  r>  rE  rK  r  rU  rY  r]  rd  rg  rl  r5  r{  r~  r  r  r   r  r  r  r  r  r  r  r  r6   r1   r/   <module>r     s@     	   : : 4 0 7 7  $ E E        
		H	%Gh 05$9*0%'?'F&E$A$I&<#;"J4T3+ T3n //:k :eDIG^<^6_ : 99(9 
	9 w}}&()]W]]5-AA***!+E$/$:$:#;1[=Q=Q<R!S		
 	,++!+E	
	
3 	
l 
 
- -$,A,9N 9 9S 9 4  N*L*L*P*P$P$	c	eL8D> L^ %  D+T#s(^ + +
C D 89&7 9c 9# 9	3'8 	3s 	32 	:% %  %"zz&!~ &!r1   