
    rh                         d dl Z d dlZddlmZmZmZ ddlmZ ddlm	Z	  e       rd dl
Z
 ej                  e      Z G d de	      Zy)	    N   )is_compressed_tensors_availableis_torch_availablelogging)CompressedTensorsConfig   )HfQuantizerc                        e Zd ZdZdZdgZdef fdZdee	   de	dee	   fd	Z
d
ee	   de	dee	   fdZd ZddZd Zd Zd Zed        ZdefdZddefdZ xZS )CompressedTensorsHfQuantizerz
    Quantizer for the compressed_tensors package.  Loads and restores models to
    quantized state with compressed_tensors
    Tcompressed_tensorsquantization_configc                     t        |   |fi | t               st        d      |j	                          ddlm} |j                  |      | _        |j                  | _	        || _
        y )NuUsing `compressed_tensors` quantized models requires the compressed-tensors library: `pip install compressed-tensors`r   )ModelCompressor)super__init__r   ImportError	post_initcompressed_tensors.compressorsr   from_compression_config
compressorrun_compressedr   )selfr   kwargsr   	__class__s       /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_compressed_tensors.pyr   z%CompressedTensorsHfQuantizer.__init__'   sh    ,77.03  	%%'B)AABUV1@@#6     missing_keysprefixreturnc                     | j                   r|S | j                  j                  |      }|D cg c]  t        fd|D              r c}S c c}w )a}  
        Update missing keys after loading the model. This is necessary for compressed tensors
        to load the model correctly. We expect weights to be present in missing keys.
        The weight's are re-constructed by ModelCompressor in _process_model_after_weight_loading

        This function cleans up expected missing keys and returns the remaining missing keys
        c              3   P   K   | ]  }t        j                  d |         ywz.*Nrematch.0patternkeys     r   	<genexpr>zQCompressedTensorsHfQuantizer.update_missing_keys_after_loading.<locals>.<genexpr>M   s#     2vU\288b	NC3P2v   #&)r   r   get_missing_module_keysany)r   modelr   r   expected_missing_keysr*   s        `r   !update_missing_keys_after_loadingz>CompressedTensorsHfQuantizer.update_missing_keys_after_loading:   sV      !% G G N'
s2v`u2v/vC
 	
 
   A
Aunexpected_keysc                     | j                   r|S | j                  j                  |      }|D cg c]  t        fd|D              r c}S c c}w )z
        Override this method if you want to adjust the `unexpected_keys`.

        Args:
            unexpected_keys (`list[str]`, *optional*):
                The list of unexpected keys in the checkpoint compared to the state dict of the model
        c              3   P   K   | ]  }t        j                  d |         ywr#   r$   r'   s     r   r+   zFCompressedTensorsHfQuantizer.update_unexpected_keys.<locals>.<genexpr>_   s$     9v\c"((Ry>SV:W9vr,   )r   r   get_unexpected_file_keysr.   )r   r/   r3   r   keys_to_ignorer*   s        `r   update_unexpected_keysz3CompressedTensorsHfQuantizer.update_unexpected_keysP   sL     "" AA%H.wc9vgu9v6vwwwr2   c                 X    t               st        d      t               st        d      y )Nr   z;torch is required for using compressed-tensors quantization)r   r   r   )r   argsr   s      r   validate_environmentz1CompressedTensorsHfQuantizer.validate_environmenta   s3    .03  "#[\\ $r   c                     |'t         j                  d       t        j                  }|S |t        j                  k7  rt         j                  d       |S )NzELoading model using torch.float16 for compressed-tensors quantizationz`We suggest you to set `torch_dtype=torch.float16` for better efficiency with compressed_tensors.)loggerinfotorchfloat16)r   torch_dtypes     r   update_torch_dtypez/CompressedTensorsHfQuantizer.update_torch_dtypek   sK    KK_`--K
 	 EMM)KKr r   c                     ddl m} | j                  j                  }| j                  r |||d       y | j                  j
                  s
 |||       y y )Nr   )apply_quantization_configT)r   )compressed_tensors.quantizationrD   r   r   r   is_quantization_compressed)r   r/   r   rD   ct_quantization_configs        r   $_process_model_before_weight_loadingzACompressedTensorsHfQuantizer._process_model_before_weight_loadingu   sL    M!%!D!D%e-CTXY))DD%e-CD Er   c                    | j                   j                  r| j                  r| j                   j                  r	|j	                  d      }|j
                  }t        j                  j                  |      sbddl	m
}  ||d      }t        j                  j                  j                  |j                  t        j                  j                        dd       }| j                   j                  r7| j                  s+ddlm} |j                   | j"                  j                   _        | j"                  j'                  ||       yy)	z3Decompress loaded model if necessary - need for qatconfigr   )cached_filezconfig.jsonN)QuantizationStatus)
model_pathr/   )r   rF   r   is_sparsification_compressedget_name_or_pathospathexiststransformers.utilsrK   sepjoinsplitrE   rM   FROZENr   quantization_status
decompress)r   r/   r   rJ   
cache_pathrK   config_file_pathrM   s           r   #_process_model_after_weight_loadingz@CompressedTensorsHfQuantizer._process_model_after_weight_loading   s     $$??H[H[%%BBZZ)F--J77>>*-:#.z=#I WW[[--.>.D.DRWW[[.QRUSU.VW
''BB4K^K^NJ\JcJc33GOO&&*E&J Cr   c                     dddddd}|j                         C|j                         j                  )|j                         j                  j                  |       |S )Nlocal_colwiselocal_rowwise)z0layers.*.feed_forward.experts.*.gate_proj.weightz6layers.*.feed_forward.experts.*.gate_proj.weight_scalez.layers.*.feed_forward.experts.*.up_proj.weightz4layers.*.feed_forward.experts.*.up_proj.weight_scalez0layers.*.feed_forward.experts.*.down_proj.weight)get_text_configbase_model_tp_planupdate)r   rJ   additional_plans      r   update_tp_planz+CompressedTensorsHfQuantizer.update_tp_plan   s_    @OFU>MDS@O
 !!#/F4J4J4L4_4_4k""$77>>Or   c                      y)NT r   s    r   is_trainablez)CompressedTensorsHfQuantizer.is_trainable       r   c                 N    | j                    xs | j                  j                   S )z7Loaded Models can carry out quantization aware training)r   r   rF   ri   s    r   is_qat_trainablez-CompressedTensorsHfQuantizer.is_qat_trainable   s'     &&&ad.F.F.a.a*aar   c                      y)z>Models quantized using compressed tensors can be saved to diskTrh   )r   safe_serializations     r   is_serializablez,CompressedTensorsHfQuantizer.is_serializable   rk   r   )rA   torch.dtyper    rq   )N)__name__
__module____qualname____doc__requires_calibrationrequired_packagesr   r   liststrr1   r8   r;   rB   rH   r^   rf   propertyrj   boolrm   rp   __classcell__)r   s   @r   r   r      s    
  -.7,C 7&
T#Y 
X[ 
`deh`i 
,xT#Y xPS xX\]`Xa x"]EK*  b$ b
$ r   r   )rR   r%   utilsr   r   r   utils.quantization_configr   baser	   r?   
get_loggerrr   r=   r   rh   r   r   <module>r      sD     
 	 P P ?  			H	%N; Nr   