
    rh                         d dl Z d dlmZ d dlmZ ddlmZ erddlmZ ddl	m
Z
mZmZmZmZ dd	lmZmZ  e       rd dlZ ej&                  e      Z G d
 de      Zy)    N)TYPE_CHECKING)version   )HfQuantizer   )PreTrainedModel)is_auto_gptq_availableis_gptqmodel_availableis_optimum_availableis_torch_availablelogging)
GPTQConfigQuantizationConfigMixinc                   z     e Zd ZdZdZg dZdZdef fdZd Z	dd	Z
d
 ZddZddZedefd       ZddZ xZS )GptqHfQuantizerz
    Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
    `auto_gptq` or `gptqmodel` package. Quantization is done under the hood for users if they load a non-prequantized model.
    F)optimum	auto_gptq	gptqmodelNquantization_configc                     t        |   |fi | t               st        d      ddlm} |j                  | j                  j                               | _	        y )NGLoading a GPTQ quantized model requires optimum (`pip install optimum`)r   )GPTQQuantizer)
super__init__r   ImportErroroptimum.gptqr   	from_dictr   to_dict_optimumoptimum_quantizer)selfr   kwargsr   	__class__s       y/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_gptq.pyr   zGptqHfQuantizer.__init__-   sM    ,77#%ghh.!.!8!89Q9Q9a9a9c!d    c                    t               st        d      t               rt               rt        j                  d       t               xrH t        j                  t        j                  j                  d            t        j                  d      kD  xs
 t               }|s)t        j                  j                         st        d      t               st               st        d      t               rSt        j                  t        j                  j                  d            t        j                  d      k  rt        d      t               rt        j                  t        j                  j                  d	            t        j                  d
      k  sHt        j                  t        j                  j                  d            t        j                  d      k  rt        d      y y )Nr   z4Detected gptqmodel and auto-gptq, will use gptqmodelz	auto-gptqz0.4.2z2GPU is required to quantize or run quantize model.z|Loading a GPTQ quantized model requires gptqmodel (`pip install gptqmodel`) or auto-gptq (`pip install auto-gptq`) library. r   zYou need a version of auto_gptq >= 0.4.2 to use GPTQ: `pip install --upgrade auto-gptq` or use gptqmodel by `pip install gptqmodel>=1.4.3`.r   z1.4.3r   1.23.99zJThe gptqmodel version should be >= 1.4.3, optimum version should >= 1.24.0)r   r   r	   r
   loggerwarningr   parse	importlibmetadatatorchcudais_availableRuntimeError)r    argsr!   gptq_supports_cpus       r#   validate_environmentz$GptqHfQuantizer.validate_environment6   s   #%ghh!#(>(@NNQR #$ `i0088EFW^I__& $% 	 !)@)@)BSTT(*.D.F O  $%'--	8J8J8R8RS^8_*`cjcpcpd
 +
  ^  $%MM),,44[ABW]]SZE[[}}Y//77	BCgmmT]F^^jkk _ &r$   returnc                     |'t         j                  }t        j                  d       |S |t         j                  k7  rt        j                  d       |S )NzRLoading the model in `torch.float16`. To overwrite it, set `torch_dtype` manually.zRWe suggest you to set `torch_dtype=torch.float16` for better efficiency with GPTQ.)r,   float16r'   info)r    torch_dtypes     r#   update_torch_dtypez"GptqHfQuantizer.update_torch_dtypeR   sG    --KKKlm  EMM)KKlmr$   c                     |dt        j                  d      i}t               s"|ddt        j                  d      ifv r|ddik(   |S )N cpur   )r,   devicer
   )r    
device_maps     r#   update_device_mapz!GptqHfQuantizer.update_device_mapZ   sN    ell512J%'J52u||TYGZB[:\,\2q'!r$   c                 h   |j                   j                  dk7  rt        d      | j                  rt	        j
                  t        j                  j	                  d            t	        j
                  d      k  r| j                  j                  |      }y  | j                  j                  |fi |}y y )N	input_idsz%We can only quantize pure text model.r   r&   )
r"   main_input_namer/   pre_quantizedr   r)   r*   r+   r   convert_modelr    modelr!   s      r#   $_process_model_before_weight_loadingz4GptqHfQuantizer._process_model_before_weight_loadingb   s    ??**k9FGG}}Y//77	BCw}}U^G__..<<UC<..<<UMfM r$   c                    | j                   r| j                  j                  |      }y | j                  j                  |j
                  | j                  _        | j                  j                  || j                  j                         t        j                  | j                  j                               |j                  _        y N)rB   r   post_init_modelr   	tokenizername_or_pathquantize_modelr   r   to_dictconfigrD   s      r#   #_process_model_after_weight_loadingz3GptqHfQuantizer._process_model_after_weight_loadingm   s    **::5AE''1195:5G5G((2""11%9Q9Q9[9[\/9/C/CDDZDZDbDbDd/eELL,r$   c                      yNT )r    s    r#   is_trainablezGptqHfQuantizer.is_trainablew   s    r$   c                      yrQ   rR   )r    safe_serializations     r#   is_serializablezGptqHfQuantizer.is_serializable{   s    r$   )r7   torch.dtyper3   rW   )rE   r   rH   )__name__
__module____qualname____doc__requires_calibrationrequired_packagesr   r   r   r2   r8   r>   rF   rO   propertyboolrS   rV   __classcell__)r"   s   @r#   r   r   #   sg    
 !=e,C el8	Nf d  r$   r   )r*   typingr   	packagingr   baser   modeling_utilsr   utilsr	   r
   r   r   r   utils.quantization_configr   r   r,   
get_loggerrX   r'   r   rR   r$   r#   <module>rh      sO         0 u u K 			H	%Yk Yr$   