
    rh                         d dl Z d dlmZmZmZmZ d dlmZ ddlm	Z	 ddl
mZ erddlmZ dd	lmZmZmZmZ dd
lmZ  e       rd dlZ ej,                  e      Z G d de	      Zy)    N)TYPE_CHECKINGAnyOptionalUnion)version   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_accelerate_availableis_optimum_quanto_availableis_torch_availablelogging)QuantoConfigc            
       4    e Zd ZdZddgZdZdZdef fdZd Z	d	 Z
d
 Zd"dZdee   dedee   fdZdddddedeeef   def
dZdeeeeef   f   deeeeef   f   fdZdddddeddfdZd#dZ	 d$dddeee      fdZd Zedefd        Zd$d!Z xZS )%QuantoHfQuantizerz*
    Quantizer for the quanto library
    quanto
accelerateTFquantization_configc                 F    t        |   |fi | | j                          y N)super__init__	post_init)selfr   kwargs	__class__s      {/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_quanto.pyr   zQuantoHfQuantizer.__init__2   s     ,77    c                 `    | j                   j                  | j                  st        d      yy)z 
        Safety checker
        NzWe don't support quantizing the activations with transformers library.Use quanto library for more complex use cases such as activations quantization, calibration and quantization aware training.)r   activationspre_quantized
ValueErrorr   s    r   r   zQuantoHfQuantizer.post_init6   s;     ##//;DDVDVO  EW;r    c                 X    t               st        d      t               st        d      y )NzhLoading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)z`Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`))r   ImportErrorr   )r   argsr   s      r   validate_environmentz&QuantoHfQuantizer.validate_environment@   s5    *,z  '(r  )r    c                 <    |ddi}t         j                  d       |S )N cpuzThe device_map was not initialized. Setting device_map to {'':'cpu'}. If you want to use the model for inference, please set device_map ='auto')loggerinfo)r   
device_maps     r   update_device_mapz#QuantoHfQuantizer.update_device_mapJ   s+    eJKK\
 r    returnc                 T    |%t         j                  d       t        j                  }|S )NzVYou did not specify `torch_dtype` in `from_pretrained`. Setting it to `torch.float32`.)r-   r.   torchfloat32)r   torch_dtypes     r   update_torch_dtypez$QuantoHfQuantizer.update_torch_dtypeT   s$    KKpq--Kr    missing_keysprefixc                 8   t               rddlm} g }|j                         D ]\  \  }}t	        |      s|D ]E  }||v s
|| d| v s|j                  d      r#|j                  d      r5|j                  |       G ^ |D 	cg c]	  }	|	|vs|	 c}	S c c}	w )Nr   QModuleMixin.z.weightz.bias)r   optimum.quantor;   named_modules
isinstanceendswithappend)
r   modelr7   r8   r;   not_missing_keysnamemodulemissingks
             r   update_missing_keysz%QuantoHfQuantizer.update_missing_keysZ   s    &(3!//1 	9LD&&,/+ 9GDvhay4I,I ' 0 0 ; ' 0 0 9(//89	9 (Ea14D+DEEEs   	BBrB   r   param_valueztorch.Tensor
param_name
state_dictc                 <   t               rddlm} |j                  d      }|j                  d      }|<|:t	        |j                               }	|dk(  rt        |	      dkD  r|	dhk(  s|	ddhk(  syt        ||      \  }
}t        |
      rd	|v r|
j                   S y)
z=
        Check if a parameter needs to be quantized.
        r   r:   r/   param_devicer,   r   diskFweight)
r   r=   r;   getsetvalueslenr
   r?   frozen)r   rB   rI   rJ   rK   r   r;   r/   rM   device_map_valuesrE   tensor_names               r   check_quantized_paramz'QuantoHfQuantizer.check_quantized_paramj   s     '(3ZZ-
zz.1!l&> #J$5$5$7 8u$->)?!)C)eW48IeU[_8\ 25*Efl+K0G}}$$r    
max_memoryc                 ^    |j                         D ci c]  \  }}||dz   }}}|S c c}}w )Ng?)items)r   rX   keyvals       r   adjust_max_memoryz#QuantoHfQuantizer.adjust_max_memory   s6    6@6F6F6HI(#sc3:oI
I Js   )target_deviceztorch.devicec                     ddl m}  |||||       t        ||      \  }}	|j                          d|j                  _        y)ze
        Create the quantized parameter by calling .freeze() after setting it to the module.
        r   )set_module_tensor_to_deviceFN)accelerate.utilsr`   r
   freezerO   requires_grad)
r   rB   rI   rJ   r^   r(   r   r`   rE   _s
             r   create_quantized_paramz(QuantoHfQuantizer.create_quantized_param   s;     	A#E:}kR(
;	&+#r    c                 P   t        j                  t        j                  j                  d            t        j                  d      kD  rTddlm} t        j                  |j                  |j                  |j                  d}|| j                  j                     }|S t        d      )Nr   z0.27.0r   )CustomDtype)int8float8int4int2zYou are using `device_map='auto'` on an optimum-quanto quantized model. To automatically compute the appropriate device map, you should upgrade your `accelerate` library,`pip install --upgrade accelerate` or install it from source.)r   parse	importlibmetadatara   rg   r3   rh   FP8INT4INT2r   weightsr$   )r   target_dtyperg   mappings       r   adjust_target_dtypez%QuantoHfQuantizer.adjust_target_dtype   s    ==++33LABW]]S[E\\4 

%//#((#((	G #4#;#;#C#CDLP r    keep_in_fp32_modulesc                     ddl m} | j                  || j                  j                  |      | _         ||| j                  | j                        \  }}| j                  |j
                  _        y )Nr   )replace_with_quanto_layers)modules_to_not_convertr   )integrationsrx   get_modules_to_not_convertr   ry   config)r   rB   rv   r   rx   rd   s         r   $_process_model_before_weight_loadingz6QuantoHfQuantizer._process_model_before_weight_loading   sf     	>&*&E&E4++BBDX'
# .$*E*E[_[s[s
q ,0+C+C(r    c                     |S r    )r   rB   r   s      r   #_process_model_after_weight_loadingz5QuantoHfQuantizer._process_model_after_weight_loading   s    r    c                      y)NTr   r%   s    r   is_trainablezQuantoHfQuantizer.is_trainable   s    r    c                      y)NFr   )r   safe_serializations     r   is_serializablez!QuantoHfQuantizer.is_serializable   s    r    )r5   torch.dtyper1   r   )rs   r   r1   r   r   ) __name__
__module____qualname____doc__required_packages requires_parameters_quantizationrequires_calibrationr   r   r   r)   r0   r6   liststrrH   dictr   boolrW   r   intr]   re   ru   r   r}   r   propertyr   r   __classcell__)r   s   @r   r   r   )   sa    "<0'+$ L FtCy F# FRVWZR[ F   $ 	
 cN 
>DeCHo1E,F 4PSUZ[^`c[cUdPdKe , , $, 	,
 &,&( UYD&D>FtCy>QD d  r    r   )rm   typingr   r   r   r   	packagingr   baser	   quantizers_utilsr
   modeling_utilsr   utilsr   r   r   r   utils.quantization_configr   r3   
get_loggerr   r-   r   r   r    r   <module>r      s[     6 6   2 0  5 			H	%` `r    