
    rh                         d dl mZmZ ddlmZ erddlmZ ddlmZ ddl	m
Z
mZmZmZ ddlmZ  e       rd d	lZ ej"                  e      Z G d
 de      Zy	)    )TYPE_CHECKINGOptional   )HfQuantizer   )PreTrainedModel)replace_with_spqr_linear)is_accelerate_availableis_spqr_availableis_torch_availablelogging)QuantizationConfigMixinNc                   z     e Zd ZdZdZdef fdZd ZddZ	 dddd	e	e
e      fd
ZddZed        ZddZ xZS )SpQRHfQuantizerzS
    Quantizer of the SpQR method. Enables the loading of prequantized models.
    Tquantization_configc                 4    t        |   |fi | || _        y N)super__init__r   )selfr   kwargs	__class__s      y/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_spqr.pyr   zSpQRHfQuantizer.__init__(   s    ,77#6     c                     t         j                  j                         st        d      t	               st        d      t               st        d      y )Nz,GPU is required to run SpQR quantized model.zGUsing `spqr` quantization requires Accelerate: `pip install accelerate`zFUsing `spqr` quantization requires SpQR: `pip install spqr_quant[gpu]`)torchcudais_availableRuntimeErrorr
   ImportErrorr   )r   argsr   s      r   validate_environmentz$SpQRHfQuantizer.validate_environment,   sG    zz&&(MNN&(ghh "fgg #r   c                     |'t         j                  }t        j                  d       |S |t         j                  k7  rt	        d      |S )NzHAssuming SpQR inference on GPU and loading the model in `torch.float16`.z|You cannot use any type other than torch.float16 for SpQR. Please either leave it None or set it totorch.float16 explicitly.)r   float16loggerinfo
ValueError)r   torch_dtypes     r   update_torch_dtypez"SpQRHfQuantizer.update_torch_dtype6   sN    --KKKbc  EMM),  r   modelr   keep_in_fp32_modulesc                     | j                  || j                  j                  |      | _        t        || j                  | j                         | j                  |j                  _        y )N)r   modules_to_not_convert)get_modules_to_not_convertr   r-   r	   config)r   r*   r+   r   s       r   $_process_model_before_weight_loadingz4SpQRHfQuantizer._process_model_before_weight_loadingA   s^     '+&E&E4++BBDX'
# 	! $ 8 8#'#>#>	

 ,0+C+C(r   c                     |S r    )r   r*   r   s      r   #_process_model_after_weight_loadingz3SpQRHfQuantizer._process_model_after_weight_loadingR   s    r   c                      y)NFr2   )r   s    r   is_trainablezSpQRHfQuantizer.is_trainableU   s    r   c                      y)NTr2   )r   safe_serializations     r   is_serializablezSpQRHfQuantizer.is_serializableY   s    r   )r(   torch.dtypereturnr9   r   )r*   r   )__name__
__module____qualname____doc__requires_calibrationr   r   r"   r)   r   liststrr0   r3   propertyr5   r8   __classcell__)r   s   @r   r   r   !   sn      7,C 7h	 59D D 'tCy1D"  r   r   )typingr   r   baser   modeling_utilsr   integrationsr	   utilsr
   r   r   r   utils.quantization_configr   r   
get_loggerr;   r%   r   r2   r   r   <module>rK      sJ    +  0 3 [ [ ? 			H	%9k 9r   