
    rh=                         d dl mZmZmZ ddlmZ erddlmZ ddlm	Z	m
Z
mZ  e
       rd dlZ ej                  e      Z G d d	e      Zy)
    )TYPE_CHECKINGOptionalUnion   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_torch_availableloggingNc                        e Zd ZdZdZdZdgZ fdZd Zdd	Z		 dddd
e
ee      fdZdeeeeef   f   deeeeef   f   fdZddZddZedefd       Zedefd       Z xZS )BitNetHfQuantizerz
    1.58-bit quantization from BitNet quantization method:
    Before loading: it converts the linear layers into BitLinear layers during loading.

    Check out the paper introducing this method: https://huggingface.co/papers/2402.17764
    FT
acceleratec                 4    t        |   |fi | || _        y N)super__init__quantization_config)selfr   kwargs	__class__s      {/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_bitnet.pyr   zBitNetHfQuantizer.__init__-   s    ,77#6     c                    t               st        d      |j                  dd      s|j                  dd      rt        d      t        j
                  j                         st        j                  d       y |j                  d      }|t        j                  d       y |At        |t              r0d	|j                         v sd
|j                         v rt        d      y y y )NzOLoading a BitNet quantized model requires accelerate (`pip install accelerate`)from_tfF	from_flaxztLoading ternary weights from tf/flax is currently not supported, please make sure the weights are in PyTorch format.zhYou don't have a GPU available to load the model, the inference will be slow because of weight unpacking
device_mapzYou have loaded a BitNet model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpudiskzYou are attempting to load a BitNet model with a device_map that contains a CPU or disk device.This is not supported. Please remove the CPU or disk device from the device_map.)r
   ImportErrorget
ValueErrortorchcudais_availableloggerwarning_once
isinstancedictvalues)r   argsr   r   s       r   validate_environmentz&BitNetHfQuantizer.validate_environment1   s    &(opp::i'6::k5+I; 
 zz&&(z ZZ-
I #*d+*:K:K:M1MQW[e[l[l[nQn g  Ro+ $r   modelr	   c                     |S r    )r   r-   r   s      r   #_process_model_after_weight_loadingz5BitNetHfQuantizer._process_model_after_weight_loadingN   s    r   keep_in_fp32_modulesc                     ddl m} | j                  || j                  j                  |      | _         ||| j                  | j                  | j
                        }y )Nr   )replace_with_bitnet_linear)modules_to_not_convertr   pre_quantized)integrationsr3   get_modules_to_not_convertr   r4   r5   )r   r-   r1   r   r3   s        r   $_process_model_before_weight_loadingz6BitNetHfQuantizer._process_model_before_weight_loadingQ   sX     	>&*&E&E4++BBDX'
# +#'#>#> $ 8 8,,	
r   
max_memoryreturnc                 ^    |j                         D ci c]  \  }}||dz   }}}|S c c}}w )Ng?)items)r   r9   keyvals       r   adjust_max_memoryz#BitNetHfQuantizer.adjust_max_memoryd   s6    6@6F6F6HI(#sc3:oI
I Js   )c                 &    t         j                  }|S r   )r#   int8)r   target_dtypes     r   adjust_target_dtypez%BitNetHfQuantizer.adjust_target_dtypeh   s    zzr   c                      y)NTr/   )r   safe_serializations     r   is_serializablez!BitNetHfQuantizer.is_serializablel   s    r   c                 j    | j                   j                  dk(  xr | j                   j                  dk(  S )Nautobitlinearonliner   linear_classquantization_moder   s    r   is_trainablezBitNetHfQuantizer.is_trainableo   s7     $$11_D G((::hF	
r   c                 j    | j                   j                  dk(  xr | j                   j                  dk(  S )zUFlag indicating whether the quantized model can carry out quantization aware trainingrH   rI   rJ   rM   s    r   is_qat_trainablez"BitNetHfQuantizer.is_qat_trainablev   s7     $$11_D G((::hF	
r   )r-   r	   r   )rB   torch.dtyper:   rQ   )__name__
__module____qualname____doc__ requires_parameters_quantizationrequires_calibrationrequired_packagesr   r,   r0   r   liststrr8   r)   r   intr?   rC   rF   propertyboolrN   rP   __classcell__)r   s   @r   r   r       s     (-$%7: 59
 
 'tCy1
&DeCHo1E,F 4PSUZ[^`c[cUdPdKe  
d 
 
 
$ 
 
r   r   )typingr   r   r   baser   modeling_utilsr	   utilsr
   r   r   r#   
get_loggerrR   r&   r   r/   r   r   <module>rd      sK    2 1  0 H H  
		H	%\
 \
r   