
    rh                     <    d Z ddlmZ ddlmZ ddlmZ 	 	 	 	 ddZy)z9VPTQ (Vector Post-Training Quantization) integration file    N)init_empty_weights)VQuantLinearc                 F   |sdgn|}| j                         D ]w  \  }}|g }|j                  |       dj                  |      }|j                  }|j                  }	t        |t        j                        r||vr||	v s|d   |v r|	j                  |d      xs |j                  |d   d      }
t               5  |j                  }|j                  }t        |||
d   |
d   |
d   |
d   |
d	   |
d
   |
d   |
d   |
d   dd|j                  du      | j                  |<   d}| j                  |   j                  d       ddd       t!        t#        |j%                                     dkD  rt'        |||||      \  }}|j)                  d       z | |fS # 1 sw Y   YxY w)aw  
    Public method that recursively replaces the Linear layers of the given model with VPTQ quantized layers.
    `accelerate` is needed to use this method. Returns the converted model and a boolean that indicates if the
    conversion has been successful or not.

    Args:
        model (`torch.nn.Module`):
            The model to convert, can be any `torch.nn.Module` instance.
        quantization_config (`VptqConfig`):
            The quantization config object that contains the quantization parameters.
        modules_to_not_convert (`list[`str`]`, *optional*, defaults to `["lm_head"]`):
            Names of the modules to not convert in `VQuantLinear`. In practice we keep the `lm_head` in full precision
            for numerical stability reasons.
        current_key_name (`list`, *optional*):
            A list that contains the current key name. This is used for recursion and should not be passed by the user.
        has_been_replaced (`bool`, *optional*):
            A boolean that indicates if the conversion has been successful or not. This is used for recursion and
            should not be passed by the user.
    lm_headN.vector_lensnum_centroidsnum_res_centroids	group_num
group_sizeoutlier_sizeindices_as_floatenable_normenable_permTF)r	   r
   r   r   r   r   r   r   r   is_indice_packedenable_proxy_errorbiasr   )quantization_configmodules_to_not_convertcurrent_key_namehas_been_replaced)named_childrenappendjoinshared_layer_configconfig_for_layers
isinstancennLineargetr   in_featuresout_featuresr   r   _modulesrequires_grad_lenlistchildrenreplace_with_vptq_linearpop)modelr   r   r   r   namemodule
layer_namer   r   layer_paramsr"   r#   _s                 q/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/integrations/vptq.pyr)   r)      s   6 1Gi[Lb,,. 2!f#!%XX./
1EE/AA vryy)"88 117G7KOb7b,00TB FYF]F] $dGL $% ;$00%22'3  ,] ;"."?&23F&G*;7+L9!-n!=%12D%E ,] ; ,] ;%)',D0(t$  %)! t$33E:/;0 tFOO%&'!+#;$7'=!1"3$ A  	R e2!f ###E; ;s   9BFF 	)NNNF)__doc__torch.nnr   
accelerater   vptqr   r)        r1   <module>r8      s'    <  ) 
 P$r7   