
    rh                        d dl Z d dlmZ d dlZd dlmZ ddlmZmZ ddl	m
Z
mZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZmZ ddlmZmZ ddlmZ  e j6                  e      Z eej<                  dd      Z eej@                  jB                  jD                  ddejF                        Z$ej@                  jB                  Z!ej@                  jJ                  Z%ej@                  jL                  Z&ddZ'ddZ(y)    N)Any)mm_args   )configlowering)CppGemmTemplateCppWoqInt4GemmTemplate)create_epilogue_with_attr)expandregister_lowering)WeightInt4PackMatmul)autotune_select_algorithmExternKernelChoicerealize_inputs)use_aten_gemm_kernelsuse_cpp_gemm_template)Vzat::_weight_int8pack_mmF)has_out_variantz*at::native::_weight_int4pack_mm_cpu_tensor)r   kernel_creatorc                  Z   t        j                  t        j                  t        j
                  t        j                  g       t        j                  t        j                         t        j                  t        j
                         t        j                  t        j                         y N)r   add_needs_realized_inputs	quantized
max_pool2d
_quantized$wrapped_fbgemm_pack_gemm_matrix_fp16!wrapped_fbgemm_linear_fp16_weightmake_fallback     v/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/torch/_inductor/quantized_lowerings.pyregister_quantized_opsr"   '   sn    &&  ;;88	
 9//0:JJK:GGHr    c                      t        t        j                  d       d ddt        j                  dt        j                  dt        j                  dt
        dt
        f
d       } t        t        j                  d       d ddt        j                  dt        j                  d	t        d
t        j                  dt
        dt
        fd       }t        j                  t        j                         t        j                  t        j                         y )N)type_promotion_kind)layoutinputweightscaler%   returnc                   t        | |d      \  }}}}}|j                         t        j                  t        j                  t        j
                  fv r!|j                         t        j                  k(  sJ }t               rt        j                  ||f|      gng }dt        j                  dt        ffd}	t        |||d      rt        j                  ||||gd|	       t        d|||g|      S )	NT)r%   mat2_transposedbufr)   c           
      Z    t        | dt        t        j                                    S )Nmul)other)r
   r   r   size)r,   r%   r(   s    r!   _mul_epiloguez?register_woq_mm_ops.<locals>.int8pack_mm.<locals>._mul_epilogueO   s'    ,U.v{{1K"L r    )r+   )trans_wepilogue_creator_weight_int8pack_mm)r   	get_dtypetorchbfloat16float16floatint8r   aten__weight_int8pack_mmbindTensorr   r   r   add_choicesr   )
r&   r'   r(   r%   _mat1mat2aten_layoutchoicesr1   s
     ``      r!   int8pack_mmz(register_woq_mm_ops.<locals>.int8pack_mm5   s    '.6&$'
#1at NN LL EJJ.	
/ 
 %& &**D$+>LM 		u|| 	 	
 !dD$O''tU#!. )!7T4,?
 	
r    
qGroupSizeqScaleAndZerosc                L   t        | ||dd      \  }}}}}}|j                         t        j                  t        j                  t        j
                  fv r!|j                         t        j                  k(  sJ t        j                  j                  t        j                  |t        j                        d       }|}	t               rt        j                  ||||f|	      gng }
t        j                   st        j"                  rMt%        |	||dd|      r<|j'                         j)                         rt*        |   j-                  |
|	||||g       dt        j.                  j0                  j2                  dt        j4                  fd}|d	 d
}t7        d|
||||g|	|      S )NT)r%   use_4x2_dimr+   )dtype)name)r+   is_woq_int4q_group_sizexr)   c                     | j                         j                         sJ | j                         }| j                         }t	        j
                  dd|t        j                  |      S )Nr      )rI   device)
get_layoutis_contiguousget_size
get_devicer6   randintuint8)rM   shaperP   s      r!   get_example_weightzHregister_woq_mm_ops.<locals>.int4pack_mm_cpu.<locals>.get_example_weight   sK    <<>//111JJLE\\^F==Cekk&QQr    c                 X    t         j                  j                  | j                            S r   )r   graph	constantsget_name)rM   s    r!   <lambda>z>register_woq_mm_ops.<locals>.int4pack_mm_cpu.<locals>.<lambda>   s    **1::<8 r    )r      _weight_int4pack_mm_for_cpu)input_gen_fns)r   r5   r6   r7   r8   r9   rV   r   rZ   add_tensor_constanttensorint64r   aten__weight_int4pack_mm_cpur<   r   max_autotunemax_autotune_gemmr   rQ   rR   r	   r>   	_inductorirIRNoder=   r   )r&   r'   rE   rF   r%   r?   r@   rA   
group_sizerB   rC   rX   r`   s                r!   int4pack_mm_cpuz,register_woq_mm_ops.<locals>.int4pack_mm_cpua   s    '.6&dD'
#1at NN LL EKK/	
0 WW00LL5;;7d 1 

  %&	 -114^<k  	   F$<$<% $ ' !//1":.::tZ8	R%//"4"4";"; 	R 	R "8

 ))4^4'
 	
r    )r   atenr4   r6   r=   r   r_   intr   r   _dyn_quant_matmul_4bit_dyn_quant_pack_4bit_weight)rD   rk   s     r!   register_woq_mm_opsrp   4   s    t//TJ )
||)
)
 ||)

 )
 
)
 K)
V t77TR C
||C
C
 C
 	C
 C
 
C
 SC
J 46674;;<r    )r)   N))loggingtypingr   r6    torch._inductor.kernel.mm_commonr    r   r   codegen.cpp_gemm_templater   r	   codegen.cpp_utilsr
   r   r   	mkldnn_irr   select_algorithmr   r   r   utilsr   r   virtualizedr   	getLogger__name__logr4   r;   opsr   int4mm_packed_weight_cpucreaterd   r   rl   r"   rp   r   r    r!   <module>r      s       4  N 8 / + 
 @  g!-	8%   2	II000'..	   II	YY!!
yy~~
It=r    