
    rh2                         d dl mZmZ ddlmZ ddlmZmZmZm	Z	 ddl
mZ ddlmZ erddlmZ  e       rd d	lmZ  e       rd d
lZ e	j&                  e      Zd Z G d de      Zy
)    )TYPE_CHECKINGAny   )prepare_for_hqq_linear)is_accelerate_availableis_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModel)remove_hook_from_moduleNc                 ^    |j                  d      d d }| }|D ]  }|j                  |   } |S )N.)split_modules)modelnamemodule_treeparentms        x/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_hqq.pyfind_parentr   %   s=    **S/#2&KF $#$M    c                       e Zd ZdZdZdZdZdgZ fdZd Z	ddd	e
e   d
ede
e   fdZddde
e   de
e   de
e   fdZdddddedeeef   def
dZdddddedddeeef   de
e   fdZd Z	 	 ddZddZddZedefd       Z xZS ) HqqHfQuantizerz
    HQQ quantizer base HF class.
    nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
    The actual quantization and offloading to the GPU is done in check_quantized_param().
    FThqqc                 B    t        |   |fi | d | _        d| _        y )NF)super__init__torch_dtypeusing_multi_gpu)selfquantization_configkwargs	__class__s      r   r"   zHqqHfQuantizer.__init__9   s&    ,77$r   c                    t               st        d      |j                  dd      s|j                  dd      rt        d      | j                  9d|v r|d   | _        n*t
        j                  | _        t        j                  d       |j                  d      }t        |t              rZd	|j                         v sd
|j                         v rt        d      t        t        |j                                     dkD  | _        y y )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.from_tfF	from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.r#   zUSetting torch_dtype to torch.float32 as the default value since it was not specified.
device_mapcpudiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r   )r   ImportErrorget
ValueErrorr#   torchfloat32loggerinfo
isinstancedictvalueslensetr$   )r%   argsr'   r,   s       r   validate_environmentz#HqqHfQuantizer.validate_environment>   s     " T  ::i'6::k5+I; 
 #&#)-#8 #(== stZZ-
j$'
))++v9J9J9L/L h 
 (+3z/@/@/B+C'Dq'H$ (r   r   r   missing_keysprefixreturnc                 R    | j                   r|D cg c]	  }d|vs| c}S |S c c}w )Nweight)pre_quantized)r%   r   r=   r>   r'   keys         r   update_missing_keysz"HqqHfQuantizer.update_missing_keys[   s1     #/ICHC4GCII Js   	$$expected_keysloaded_keysc                    | j                   s|S fdt        |      }t               rOddlm} |j                         D ]  \  }}||_         t               } ||       t               }	|D ]6  }
|j                  j                  d   D ]  }||
v s|	j                  |
        8 ||	z  } |d d t        j                  dd      j                         dhz
  }t               }|D ](  t        fd	|D              s|j                         * ||z  }|D ]_  }
|
d
z   |v r|j                  |
d
z          n%|j                  |D ch c]
  }|
dz   |z    c}       |
dz   |v sL|j                  |
dz          a t        |      S c c}w )Nc                     | j                         D ]M  \  }}t        |t        j                  j                        r|j                  |j                          ||       O y N)named_childrenr6   r2   nnLinearaddr   )r   layersr   module_find_hqq_quantizable_layerss       r   rP   zIHqqHfQuantizer.update_expected_keys.<locals>._find_hqq_quantizable_layersk   sK     % 4 4 6 =ffuxx8JJv{{+,VV<=r   r   	HQQLinearskip_modulesr-   Flinear_layerquant_configcompute_dtypedevicedel_origbiasc              3   &   K   | ]  }|v  
 y wrI    ).0_modulerC   s     r   	<genexpr>z6HqqHfQuantizer.update_expected_keys.<locals>.<genexpr>   s     D'w#~Ds   z.weightr   z.bias)rB   r:   r   hqq.core.quantizerR   named_modulesr   configr&   rM   r2   float16state_dict_keysanyupdatelist)r%   r   rE   rF   new_keysrR   r   rO   _valid_modules_skipped_modulesr^   _skip_module	_ref_keys_rm_keys_ref_keyrP   rC   s                  @@r   update_expected_keysz#HqqHfQuantizer.update_expected_keysd   s    !!  	= }%3 !& 3 3 5 #f"# !UN(?  #u) 6$)LL$D$D^$T 6L#w.(,,W566 ..N "!!#mm o6(+I uH &D^DDLL%&  H * 4Y&+5LL9!45OOi$X(Ws]X%=$XYW$3LL7!234 H~	 %Ys   F
param_valueztorch.Tensor
param_name
state_dictc                 2   t               rddlm} t        ||      \  }}| j                  r-t        |t        j                  j                  f      xr |dk7  S t        |t        j                  j                        xr |dk(  xs t        |      xr |dk(  S )Nr   rQ   rA   rZ   )	r   r`   rR   r   rB   r6   r2   rK   rL   )	r%   r   rp   rq   rr   r'   rR   rO   tensor_names	            r   check_quantized_paramz$HqqHfQuantizer.check_quantized_param   s     325*Ev'CDa+YaJaa 6588??3 ,8+M vy1KkV6Kr   target_deviceztorch.deviceunexpected_keysc                    t               rddlm} t        d|fd       }||_        t        ||      \  }	}
dj                  |j                  d      dd       }t        ||      }|j                  d      d   }|
dk(  ryi }|j                         D ]=  \  }}|dz   |v s|||j                  d      d   <   |(||v s-|j                  |       ? | j                  rt        |	      ry |dd| j                  |d	
      }|j                  |       |j                  Rt        |j                  t         j"                        r.t         j$                  j'                  |j                        |_        | j(                  r| j+                  |      }t-        |||       |	`~	t         j0                  j3                          y|j                         D ]/  \  }}t-        |	|t         j$                  j'                  |             1 |j4                  j6                  d   }|j4                  j6                  d   }dj                  |	j8                  j                  d      dd       }d}d|v r|}n	||v r||   }|D ]  }||	j8                  v sd} n | |	|| j                  |d      }|j                  Rt        |j                  t         j"                        r.t         j$                  j'                  |j                        |_        | j(                  r| j+                  |      }t-        |||       n*|	j;                  | j                  |      }	t-        |||	       t         j0                  j3                          y)a  
        Each nn.Linear layer is processed here.
        We first check if the corresponding module state_dict contains already HQQ quantized parameters.
        If not, we create a temp linear layer with the module state_dict params and use it for quantization
        r   rQ   _selfc                 Z    t        j                  d| j                  | j                        S )Nr   dtyperX   )r2   emptyrW   rX   )ry   s    r   rA   z5HqqHfQuantizer.create_quantized_param.<locals>.weight   s    {{1E,?,?UUr   r   Nr   rZ   FrT   rV   rS   weight_quant_paramsT)rV   rW   rX   rY   r{   )r   r`   rR   propertyrA   r   joinr   r   itemsremoverB   r6   r#   load_state_dictrZ   r2   TensorrK   	Parameterr$   _patch_layer_for_multigpusetattr__dict__cudaempty_cacherb   r&   r   to)r%   r   rp   rq   rv   rr   rw   rR   rA   rO   rt   
layer_nameparent_modulenodemodule_state_dictkv	hqq_layerrC   tensorrV   rS   
module_tagmodule_quant_configskip_modules                            r   create_quantized_paramz%HqqHfQuantizer.create_quantized_param   s?    3
 Vi V V  &I25*EXXj..s3CR89
#E:6$R(&  $$& 	.DAqC1$67!!''#,r"23".13G#**1-		. &),%!%!%"&"2"2("	 %%&78~~)j.V!&!3!3INN!C	## ::9E	M43 JJ""$ -224 	=KCFC!3!3F!;<	=
 ||77G||77GXXfkk//4RS9:
" L0".<'".z":' 	Kfkk)&*#	
 *!0"..$I ~~)j.V!&!3!3INN!C	## ::9E	M43 YYT%5%5mYLFM40

 r   c                 <    t              d fd_        S )Nc                     t        j                  |j                  | j                        | j	                         j                               }| j                  || j                  z  }|S rI   )r2   matmulr   rX   
dequantizetrZ   )r%   xouts      r   forward_with_devicezEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_device.  sL    ,,qttDKK0$//2C2E2E2GHCyy$tyy Jr   c                      |       S rI   r\   )r   r   r   s    r   <lambda>z:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>4  s    &9)Q&G r   )r   forward)r%   r   r   s    `@r   r   z(HqqHfQuantizer._patch_layer_for_multigpu+  s#    +I6		 H	r   c                 2    t        || j                        }y )N)r&   )r   r&   r%   r   r'   s      r   $_process_model_before_weight_loadingz3HqqHfQuantizer._process_model_before_weight_loading7  s     'u$BZBZ[r   c                 >    d|_         | j                         |_        |S NT)is_hqq_quantizedis_serializableis_hqq_serializabler   s      r   #_process_model_after_weight_loadingz2HqqHfQuantizer._process_model_after_weight_loading@  s     !%$($8$8$:!r   c                      yr   r\   )r%   safe_serializations     r   r   zHqqHfQuantizer.is_serializableE  s    r   c                      yr   r\   )r%   s    r   is_trainablezHqqHfQuantizer.is_trainableH  s    r   )r   r   rI   )__name__
__module____qualname____doc__use_keep_in_fp32_modules requires_parameters_quantizationrequires_calibrationrequired_packagesr"   r<   rg   strrD   ro   r7   r   boolru   r   r   r   r   r   r   r   __classcell__)r(   s   @r   r   r   -   sX     %'+$ %
I: & 6:3i IL 	c ;&;7;Cy;OSTWy;	c;z  $ 	
 cN 
.p! p! $p! 	p!
 &p! cNp! cp!f
\ \
 d  r   r   )typingr   r   integrationsr   utilsr   r   r	   r
   baser   quantizers_utilsr   modeling_utilsr   accelerate.hooksr   r2   
get_loggerr   r4   r   r   r\   r   r   <module>r      sZ    & 1 Z Z  2 0 8			H	%][ ]r   