
    rh)                         d dl mZmZmZ ddlmZ ddlmZ erddlm	Z	 ddl
mZmZmZmZ ddlmZ  e       rd d	lZ ej$                  e      Z G d
 de      Zy	)    )TYPE_CHECKINGAnyOptional   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_fp_quant_availableis_qutlass_availableis_torch_availablelogging)QuantizationConfigMixinNc                       e Zd ZdZdZdZdZdgZdef fdZ	d Z
dd	Z	 dd
ddddedddeeef   deee      fdZ	 	 ddZddZdee   dedee   fdZedd
ed   fd       ZddZd
ddddedeeef   def
dZ xZS )FPQuantHfQuantizerz
    Quantizer for the FP-Quant method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
    FTfp_quantquantization_configc                 4    t        |   |fi | || _        y N)super__init__r   )selfr   kwargs	__class__s      }/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_fp_quant.pyr   zFPQuantHfQuantizer.__init__+   s    ,77#6     c                    t         j                  j                         st        d      t	               s!| j
                  j                  st        d      | j
                  j                  rt        j                  d       t               st        d      |t        d      t        |t              r0d|j                         v sd|j                         v rt        d      y y )	NzPFPQuant quantization is only supported on GPU. Please use a different quantizer.a  Using `fp_quant` with real quantization requires a **Blackwell GPU** and qutlass: `git clone https://github.com/IST-DASLab/qutlass.git && cd qutlass && pip install --no-build-isolation .`. You can use `FPQuantConfig(pseudoquantization=True, ...)` to use Triton-based pseudo-quantization. It doesn't provide any speedups but emulates the quantization behavior of the real quantization.zUsing pseudo-quantization for FP-Quant. This doesn't provide any speedups but emulates the quantization behavior of the real quantization.zGUsing `fp_quant` quantization requires fp_quant: `pip install fp_quant`zyYou are attempting to load a FPQuant model without setting device_map. Please set device_map comprised of 'cuda' devices.cpudiskzYou are attempting to load a FPQuant model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availableNotImplementedErrorr   r   pseudoquantizationImportErrorloggerwarningr   
ValueError
isinstancedictvalues)r   
device_mapr   s      r   validate_environmentz'FPQuantHfQuantizer.validate_environment/   s    zz&&(%b  $%d.F.F.Y.Y S  ##66NN ] %&ghhF  
D)u
8I8I8K/KvYcYjYjYlOld  Pm)r   returnc                     |'t         j                  d       t        j                  }|S |t        j                  k7  rt	        d| d      |S )NzV`torch_dtype` is None. Setting `torch_dtype=torch.bfloat16` for qutlass compatibility.zInvalid `torch_dtype` zC. fp_quant quantization only supports `torch_dtype=torch.bfloat16`.)r&   infor    bfloat16r(   )r   torch_dtypes     r   update_torch_dtypez%FPQuantHfQuantizer.update_torch_dtypeM   sV    KKpq..K  ENN*(5xy  r   modelr
   param_valueztorch.Tensor
param_nametarget_deviceztorch.device
state_dictunexpected_keysc                 8   t        ||      \  }}|j                  d      rDt        j                  j	                  |j                  |      d      |_        d |_        d |_        y |j                  d      rIt        j                  j	                  |j                  |            |_        d |_        d |_        d |_	        y t        j                  j	                  |j                  |            |_        |j                          |||v r|j                  |       y y y )Nz.qweightF)requires_gradz	.dqweight)r   endswithr    nn	Parametertoqweightweightdqweightscalespre_forwardremove)	r   r4   r5   r6   r7   r8   r9   module_s	            r   create_quantized_paramz)FPQuantHfQuantizer.create_quantized_paramX   s     )
;	 z*"XX//}-# 0 FN !FM"FO{+#hh001NOFO FM!FN FM **;>>-+HI&:+H"":. ,I&r   c                     ddl m} ddlm}  || || j                               | j                  |j
                  _        y )Nr   )replace_with_fp_quant_linearr	   )adapt_fp_quant_config)fp_quant_linear_config)r   rJ   integrations.fp_quantrK   r   config)r   r4   r   rJ   rK   s        r   $_process_model_before_weight_loadingz7FPQuantHfQuantizer._process_model_before_weight_loading   s7    
 	:A$#89Q9Q#R	
 ,0+C+C(r   c                     |S r    )r   r4   r   s      r   #_process_model_after_weight_loadingz6FPQuantHfQuantizer._process_model_after_weight_loading   s    r   missing_keysprefixc                    	 ddl m} |j                         D ch c]  \  }}t        ||      s| c}}	dt        dt
        f	fd}|D cg c]  } ||      r| c}S c c}}w c c}w )Nr   FPQuantLinearkeyr.   c                       j                  d      s j                  d      ry d  t         fdD              S )Nz.weightz.biasF.c              3   2   K   | ]  }|v xs |v   y wr   rQ   ).0namefull_keyrX   s     r   	<genexpr>zQFPQuantHfQuantizer.update_missing_keys.<locals>.should_exclude.<locals>.<genexpr>   s"     R4ts{6dh&66Rs   )r<   any)rX   r^   fp_quant_namesrT   s   `@r   should_excludez>FPQuantHfQuantizer.update_missing_keys.<locals>.should_exclude   s>    ||I&#,,w*? 3%(HR>RRRr   )r   rW   named_modulesr)   strbool)
r   r4   rS   rT   rW   r]   rF   rb   rX   ra   s
      `     @r   update_missing_keysz&FPQuantHfQuantizer.update_missing_keys   sj    *383F3F3Hn<4JW]_lLm$n	S 	S 	S  ,G>#3FGG o Hs   A%A%A+A+c                      y)NFrQ   )r   r4   s     r   is_trainablezFPQuantHfQuantizer.is_trainable   s    r   c                      y)NTrQ   )r   safe_serializations     r   is_serializablez"FPQuantHfQuantizer.is_serializable   s    r   c                 P    ddl m} t        ||      \  }}t        ||      r|dv ryy)Nr   rV   )rA   r@   rB   TF)r   rW   r   r)   )	r   r4   r5   r6   r8   r   rW   rF   tensor_names	            r   check_quantized_paramz(FPQuantHfQuantizer.check_quantized_param   s0     	+25*Efm,@a1ar   )r2   torch.dtyper.   ro   r   )r4   r
   )__name__
__module____qualname____doc__requires_calibration requires_parameters_quantizationis_qat_trainablerequired_packagesr   r   r-   r3   rd   r*   r   r   listrH   rO   rR   rf   propertyrh   rk   re   rn   __classcell__)r   s   @r   r   r   !   s1    !'+$#7,C 7<	$ 04(/ (/ $(/ 	(/
 &(/ cN(/ "$s),(/TD DHtCy H# HRVWZR[ H (+<"=    $ 	
 cN 
r   r   )typingr   r   r   baser   quantizers_utilsr   modeling_utilsr
   utilsr   r   r   r   utils.quantization_configr   r    
get_loggerrp   r&   r   rQ   r   r   <module>r      sO    0 /  2 0 \ \ ? 			H	%V Vr   