
    rh                         d dl mZmZ ddlmZ erddlmZ ddlmZm	Z	m
Z
mZ ddlmZ  e	       rd dlZ ej                  e      Z G d	 d
e      Zy)    )TYPE_CHECKINGOptional   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_torch_availableis_vptq_availablelogging)QuantizationConfigMixinNc                        e Zd ZdZdZdgZdef fdZd ZddZ		 dd	d
de
ee      fdZddZedefd       ZddZ xZS )VptqHfQuantizerzS
    Quantizer of the VPTQ method. Enables the loading of prequantized models.
    Tvptqquantization_configc                 4    t        |   |fi | || _        y N)super__init__r   )selfr   kwargs	__class__s      y/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_vptq.pyr   zVptqHfQuantizer.__init__(   s    ,77#6     c                 X    t               st        d      t               st        d      y )NzGUsing `vptq` quantization requires Accelerate: `pip install accelerate`zEUsing `vptq` quantization requires VPTQ>=0.0.4: `pip install -U vptq`)r	   ImportErrorr   )r   argsr   s      r   validate_environmentz$VptqHfQuantizer.validate_environment,   s+    &(ghh "eff #r   returnc                 ,   |t         j                  j                         r't         j                  }t        j                  d       |S dd l}t        |dd       } |d      du rt        d      t         j                  }t        j                  d       |S )	NzCUDA available. Assuming VPTQ inference on GPU and loading the model in `torch.float16`. To overwrite it, set `torch_dtype` manually.r   device_availabilityc                      yNF )devices    r   <lambda>z4VptqHfQuantizer.update_torch_dtype.<locals>.<lambda>=   s    r   cpuTzKNo GPU found. Please wait for the next release of VPTQ to use CPU inferencezVNo GPU found. Assuming VPTQ inference on CPU and loading the model in `torch.float32`.)
torchcudais_availablefloat16loggerinfor   getattrRuntimeErrorfloat32)r   torch_dtyper   r!   s       r   update_torch_dtypez"VptqHfQuantizer.update_torch_dtype3   s    zz&&(#mm \  &-d4IK_&`#&u-5&'tuu#mmtur   modelr   keep_in_fp32_modulesc                     ddl m} | j                  || j                  j                  |      | _         ||| j                  | j                         | j                  |j
                  _        y)z
        we don't have param like modules_to_not_convert to indicate which layers should not be quantized
        because `quantization_config` include the layers that should be quantized
        r   )replace_with_vptq_linear)r   modules_to_not_convertN)integrationsr6   get_modules_to_not_convertr   r7   config)r   r3   r4   r   r6   s        r   $_process_model_before_weight_loadingz4VptqHfQuantizer._process_model_before_weight_loadingD   sa     	<&*&E&E4++BBDX'
# 	! $ 8 8#'#>#>	

 ,0+C+C(r   c                     |S r   r$   )r   r3   r   s      r   #_process_model_after_weight_loadingz3VptqHfQuantizer._process_model_after_weight_loading[   s    r   c                      yr#   r$   )r   s    r   is_trainablezVptqHfQuantizer.is_trainable^   s    r   c                      y)NTr$   )r   safe_serializations     r   is_serializablezVptqHfQuantizer.is_serializableb   s    r   )r1   torch.dtyper   rC   r   )r3   r   )__name__
__module____qualname____doc__requires_calibrationrequired_packagesr   r   r   r2   r   liststrr;   r=   propertyboolr?   rB   __classcell__)r   s   @r   r   r       s}      7,C 7g( 59D D 'tCy1D. d  r   r   )typingr   r   baser   modeling_utilsr   utilsr	   r
   r   r   utils.quantization_configr   r(   
get_loggerrD   r,   r   r$   r   r   <module>rU      sI    +  0 [ [ ? 			H	%Ck Cr   