
    rh-                        d dl Z d dlmZmZ ddlmZ ddlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ dd	l m!Z! dd
l"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9 ddl:m;Z; ddl<m=Z= ddl>m?Z? ddl@mAZA ddlBmCZC ddlDmEZE ddlFmGZG i de%de)de+d e7d!e!d"e?d#eAd$e5d%e/d&e9d'e;d(e-d)e1d*eEd+e'd,eGd-eCe3e#e=d.ZHi dededed%ed ed!e	d"ed#ed$ed'ed(ed)ed&ed*ed+ed,ed-eee
ed.ZI ej                  eK      ZL G d/ d0      ZM G d1 d2      ZNd3eOfd4ZPd5eOfd6ZQy)7    N)OptionalUnion   )
AutoConfig)logging)
AqlmConfigAutoRoundConfig	AwqConfigBitNetQuantConfigBitsAndBytesConfigCompressedTensorsConfig
EetqConfigFbgemmFp8ConfigFineGrainedFP8ConfigFPQuantConfig
GPTQConfigHiggsConfig	HqqConfigMxfp4ConfigQuantizationConfigMixinQuantizationMethodQuantoConfigQuarkConfig
SpQRConfigTorchAoConfig
VptqConfig   )HfQuantizer)AqlmHfQuantizer)AutoRoundQuantizer)AwqQuantizer)BitNetHfQuantizer)Bnb4BitHfQuantizer)Bnb8BitHfQuantizer)CompressedTensorsHfQuantizer)EetqHfQuantizer)FbgemmFp8HfQuantizer)FineGrainedFP8HfQuantizer)FPQuantHfQuantizer)GptqHfQuantizer)HiggsHfQuantizer)HqqHfQuantizer)Mxfp4HfQuantizer)QuantoHfQuantizer)QuarkHfQuantizer)SpQRHfQuantizer)TorchAoHfQuantizer)VptqHfQuantizerawqbitsandbytes_4bitbitsandbytes_8bitgptqaqlmquantoquarkfp_quanteetqhiggshqqzcompressed-tensors
fbgemm_fp8torchaobitnetvptqspqr)fp8z
auto-roundmxfp4c                   6    e Zd ZdZedefd       Zed        Zy)AutoQuantizationConfigz
    The Auto-HF quantization config class that takes care of automatically dispatching to the correct
    quantization config given a quantization config stored in a dictionary.
    quantization_config_dictc           	      v   |j                  d      }|j                  dd      s|j                  dd      r*|j                  dd      rdnd}t        j                  |z   }n|t        d      |t        vr,t        d| d	t        t        j                                      t        |   }|j                  |      S )
Nquant_methodload_in_8bitFload_in_4bit_4bit_8bitThe model's quantization config from the arguments has no `quant_method` attribute. Make sure that the model has been correctly quantizedUnknown quantization type, got  - supported types are: )	getr   BITS_AND_BYTES
ValueError AUTO_QUANTIZATION_CONFIG_MAPPINGlistAUTO_QUANTIZER_MAPPINGkeys	from_dict)clsrG   rI   suffix
target_clss        o/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/quantizers/auto.pyrX   z AutoQuantizationConfig.from_dicty   s    /33NC#''>BZB^B^_motBu 8 < <^U SWY`F-<<vEL! \  ??1, @/44678: 
 6lC
##$<==    c                     t        j                  |fi |}t        |dd       t        d| d      |j                  }| j                  |      } |j                  di | |S )Nquantization_configz)Did not found a `quantization_config` in z2. Make sure that the model is correctly quantized. )r   from_pretrainedgetattrrS   r_   rX   update)rY   pretrained_model_name_or_pathkwargsmodel_configrG   r_   s         r\   ra   z&AutoQuantizationConfig.from_pretrained   s    !112OZSYZ<!6=E;<Y;Z  [M  N  $0#C#C !mm,DE""",V,""r]   N)__name__
__module____qualname____doc__classmethoddictrX   ra   r`   r]   r\   rF   rF   s   s6    
 > > >( 
# 
#r]   rF   c                   z    e Zd ZdZedeeef   fd       Zed        Z	edeeef   de
e   fd       Zed        Zy)	AutoHfQuantizerz
     The Auto-HF quantizer class that takes care of automatically instantiating to the correct
    `HfQuantizer` given the `QuantizationConfig`.
    r_   c           	      D   t        |t              rt        j                  |      }|j                  }|t
        j                  k(  r|j                  r|dz  }n|dz  }|t        vr,t        d| dt        t        j                                      t        |   } ||fi |S )NrM   rL   rO   rP   )
isinstancerl   rF   rX   rI   r   rR   rJ   rV   rS   rU   rW   )rY   r_   re   rI   r[   s        r\   from_configzAutoHfQuantizer.from_config   s     )40"8"B"BCV"W*77 -<<<"//''551, @/44678: 
 ,L9
-888r]   c                 P    t        j                  |fi |}| j                  |      S )N)rF   ra   rq   )rY   rd   re   r_   s       r\   ra   zAutoHfQuantizer.from_pretrained   s*    4DDEbmflm233r]   quantization_config_from_argsc           	      .   |d}nd}t        |t              r;t        |t              rt        j                  |      }nt        j                  |      }t        |t
        t        t        t        t        t        f      rW|U|j                         }|j                         D ]  \  }}t        |||        |dt        |j                                dz  }|dk7  r't        |t              st        j                   |       |S t"        j%                  |       |S )z
        handles situations where both quantization_config from args and quantization_config from model config are present.
        zYou passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` from the model will be used. z"However, loading attributes (e.g. z]) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.)rp   rl   r	   rX   rF   r   r
   r   r   r   get_loading_attributesitemssetattrrU   rW   warningswarnloggerinfo)rY   r_   rs   warning_msgloading_attr_dictattrvals          r\   merge_quantization_configsz*AutoHfQuantizer.merge_quantization_configs   s"    )4y 
 K)407I&5&?&?@S&T#&<&F&FGZ&[# #YJacno .9 !> T T V.446 8	c+T378 ?EVE[E[E]@^?_  `}  ~  ~K"Z0C[%QMM+& #" KK$""r]   c           	      ^   | j                  dd       }| j                  dd      s| j                  dd      r*| j                  dd      rdnd}t        j                  |z   }n|t        d      |t        vr8t
        j                  d| d	t        t        j                                d
       yy)NrI   rJ   FrK   rL   rM   rN   rO   rP   z~. Hence, we will skip the quantization. To remove the warning, you can delete the quantization_config attribute in config.jsonT)
rQ   r   rR   rS   rT   r{   warningrU   rV   rW   )rG   rI   rZ   s      r\   supports_quant_methodz%AutoHfQuantizer.supports_quant_method   s    /33NDI#''>BZB^B^_motBu 8 < <^U SWY`F-<<vEL! \  ??NN1, @/44678 9ii
 r]   N)rg   rh   ri   rj   rk   r   r   rl   rq   ra   r   r   staticmethodr   r`   r]   r\   rn   rn      s    
 9e4KT4Q.R 9 90 4 4 *#"4)@#@A*# (00G'H*# *#X  r]   rn   methodc                       fd}|S )z-Register a custom quantization configuration.c                 ~    t         v rt        d d      t        | t              st	        d      | t         <   | S )NzConfig '' already registeredz*Config must extend QuantizationConfigMixin)rT   rS   
issubclassr   	TypeError)rY   r   s    r\   register_config_fnz8register_quantization_config.<locals>.register_config_fn  sH    55xx/CDEE#67HII36(0
r]   r`   )r   r   s   ` r\   register_quantization_configr     s     r]   namec                       fd}|S )zRegister a custom quantizer.c                 ~    t         v rt        d d      t        | t              st        d      | t         <   | S )NzQuantizer 'r   z!Quantizer must extend HfQuantizer)rV   rS   r   r   )rY   r   s    r\   register_quantizer_fnz1register_quantizer.<locals>.register_quantizer_fn  sG    )){4&0DEFF#{+@AA'*t$
r]   r`   )r   r   s   ` r\   register_quantizerr     s     ! r]   )Rry   typingr   r   models.auto.configuration_autor   utilsr   utils.quantization_configr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   baser   quantizer_aqlmr   quantizer_auto_roundr    quantizer_awqr!   quantizer_bitnetr"   quantizer_bnb_4bitr#   quantizer_bnb_8bitr$   quantizer_compressed_tensorsr%   quantizer_eetqr&   quantizer_fbgemm_fp8r'   quantizer_finegrained_fp8r(   quantizer_fp_quantr)   quantizer_gptqr*   quantizer_higgsr+   quantizer_hqqr,   quantizer_mxfp4r-   quantizer_quantor.   quantizer_quarkr/   quantizer_spqrr0   quantizer_torchaor1   quantizer_vptqr2   rV   rT   
get_loggerrg   r{   rF   rn   strr   r   r`   r]   r\   <module>r      s    " 7      .  + 4 ' / 2 2 F + 6 @ 2 + - ) - / - + 1 +	<+ + O	
 O   " O  
> 6 & !   O!" O#$ %$) .$	9$+$ +$ J	$
 J$ J$ l$ [$ $ 
9$ 1$ /$ [$ }$ $  J!$" J#$$  !)$  . 
		H	%&# &#Rc cL  !S !r]   