
    rh                     p   d dl mZ d dlmZmZ d dlZd dlZd dlmZ d dl	m
Z
 ddlmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZmZmZ dd	lmZ 	 	 	 	 d4d
ddddddee   dee   dee   dede deee      dee!d      ded   fdZ"	 	 	 d5d
ddddddee!d      ded   de fdZ#d Z$ G d de      Z% G d de      Z& G d de      Z' G d  d!e      Z( G d" d#e      Z) G d$ d%e      Z* G d& d'e      Z+ G d( d)e      Z, G d* d+e      Z- G d, d-e      Z. G d. d/e      Z/ G d0 d1e      Z0 G d2 d3e      Z1y)6    )Sequence)AnyOptionalN)make_channels_last_strides_for
OrderedSet   )ExternKernelAllocFixedLayoutFlexibleLayoutget_device_typeir_node_to_tensor is_contiguous_storage_and_layoutLayoutmay_convert_to_optionalMultiOutputMultiOutputLayoutMutationOutput
NoneLayout	TensorBox)convert_shape_to_inductorpad_listlikeSUPPORTED_MKLDNN_DEVICES)Vxr   weightbiaspaddingstridedilationgroups
transposedoutput_paddingquantize_argsotherc                 r   d }dd}d }|j                          |j                          ||j                          t        j                  j                  5  t	        |d      }t	        |d      }t        |j                               dz
  }dt        |      cxk  r|k  sJ  J dt        |      cxk  r|k  sJ  J dt        |      cxk  r|k  sJ  J t        ||      }t        ||      }t        ||      }|	t        dg|      }	n%dt        |	      cxk  r|k  sJ  J t        |	|      }	t        |t        t        j                  j                  j                  f      sJ |r( |||      }|j                         } |||||	|||      }n|t        |j                        }t        |j                        }t        |      t        |      k7  r/t        |      d	k(  rt        |      d
k(  sJ |j!                  d        ||||||      }dgt        t#        t%        dt        |      dz                     z   }t        |      g|z   }ddd       | j'                  |      }t)        d D               }|st+        |      dk(  r!t-        |      rt/        j0                  |      }nEt+        |      dk(  r,|j3                         d   dk(  rt/        j0                  |      }nt5        |      }t+        |      t+        |      k(  sJ t+        |      t6        v sJ |g}|
X|
\  }}}}|j                          |j                          |j                          |j                          |||gz   |gz   ||gz   }n||gz  }|*| j'                  ||      }t        |t8              sJ ||gz  }t;        |j=                         |j?                         tA        |      tA        |            }||||g} |r| jC                  d|	       ||jE                  |       n| jC                  d|       || |||fS # 1 sw Y   xY w)a}  
    This function is a helper function to prepare inputs, layout and constant args
    for convolution post-op fusion's create function, including deciding the output
    layout (channels first or channels last), realizing inputs and make them etc. The
    function only supports the CPU/XPU device since conv post-op fusion kernel is only
    supported on CPU/XPU right now.
    c                    t        |       t        |      k(  sJ d       t        |       }|dkD  sJ d       d}d}	g }
|
j                  | |          |
j                  ||	   |z         t        d|      D ]P  }||   dz
  ||dz
     z  dz   }| |   dz
  ||dz
     z  ||dz
     dz  z
  |z   ||dz
     z   }|
j                  |       R t        t	        t
        |
            S )NzExpect input dim == weight dim   zExpect input dim > 2r   r	   )lenappendrangelistmapint)output_sizeweight_sizer   r#   r   r    r!   dim	BATCH_DIMWEIGHT_INPUT_CHANNELS_DIM
input_sizedkernelinput_size_ds                 l/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/torch/_inductor/mkldnn_ir.py_conv_input_sizez<_prepare_convolution_fusion_create.<locals>._conv_input_size5   s!    ;3{#33U5UU3+Qw...w	$%!
+i01+&?@6IJq# 	,A!!nq(HQUO;a?FQ!#va!e}41q5>A%' !Q'(  l+	, CZ())    Nc                 0   |d u}t        |       }g }|j                  | d          |j                  |d          t        d|      D ]M  }|r||dz
     nd}	|	||   dz
  z  dz   }
| |   d||dz
     z  z   |
z
  ||dz
     z  dz   }|j                  |       O |S )Nr   r(   r	   )r)   r*   r+   )r4   r0   r   r   r    has_dilationr1   r/   r5   	dilation_r6   output_size_ds               r8   _conv_output_sizez=_prepare_convolution_fusion_create.<locals>._conv_output_sizeM   s    t+*o:a=);q>*q# 	.A+7QQI+a.1"459F']a'!a%..@AFJvAP M }-	. r:   c                 L   | j                         t              }|dkD  sJ d       |dkD  rVg }|j                  d   |z         |j                  d   |z         |j                  fdt	        d|      D               |S | j                  dd      j                         }|S )Nr(   zExpect weight dim > 2r	   r   c              3   (   K   | ]	  }|     y wN ).0r5   prepacked_weight_sizes     r8   	<genexpr>z[_prepare_convolution_fusion_create.<locals>._original_deconv_weight_size.<locals>.<genexpr>k   s     OA4Q7Os   )sizer)   r*   extendr+   	transpose)prepacked_weightr!   r1   r0   rE   s       @r8   _original_deconv_weight_sizezH_prepare_convolution_fusion_create.<locals>._original_deconv_weight_size`   s     !1 5 5 7'(Qw///wA:K4Q7&@A4Q7&@AOq#OO  +44Q:??AKr:   T)guard_shaper(   r         r	   c              3   <   K   | ]  }t        |t                y wrB   )
isinstancer.   )rD   is     r8   rF   z5_prepare_convolution_fusion_create.<locals>.<genexpr>   s     GAZ3/Gs   xpurB   )#realizer   graph	fake_moder   r)   rG   r   rP   r.   sympycorenumbersIntegerr,   shapepopreversedr+   require_stride_orderallr   r   r   contiguous_strides
get_strider   r   r   r   get_device_or_error	get_dtyper   insertr*   )!clsr   r   r   r   r   r    r!   r"   r#   r$   r%   r9   r?   rK   x_fakeweight_fakedimsr0   r4   r/   x_shapeweight_shapereq_stride_orderdynamic_shapesoutput_strideinputsx_scalex_zero_pointw_scalew_zero_pointkernel_layoutconstant_argss!                                    r8   "_prepare_convolution_fusion_creatert      s=   .*0&  IIK
NN	
		 /F"1$7'DA6;;=!A%3w<'4'''''3x=(D(((((3v;&$&&&&&w-$/fd+!)1#t4Ns>*2d22222).$?N&3

(:(:(B(B"CDDD 7{FKKJ*K 6<<(G 1 12L7|s<007|q(S->!-CCC  #+K 3huQFa/H&I!JJ 0125EE_/Fb 	  $45A G+GGGN/!,5
*1
-&99+F 
	u	$):a)?&99+F6{C1!88881!9999SF 7D4w7L11VH<?VV6(((0@A%+++5'		!+.!-0	M fh7MQ/dQ%=-1A5HHW/F /Fs   G4P,,P6
binary_sumc           
         |j                          |j                          ||j                          |j                         ^ }}|j                         \  }}	t        |      |	gz   }
t        t        t	        t        |j                                                 }| j                  ||      }t        |      t        |      k(  sJ t        |      t        v sJ |g}|X|\  }}}}|j                          |j                          |j                          |j                          |||gz   |gz   ||gz   }n||gz  }||r| j                  ||      }||gz   }t        j                  |
      }t        |j                         |j                         |
|      }g }||j                  |       n|j                  d|       |||||fS )z
    This function is a helper function to prepare inputs, layout and constant args
    for linear post-op fusion's create function. The function only supports the CPU device
    since linear post-op fusion kernel is only supported on CPU right now.
    r   )rS   get_sizer,   r\   r+   r)   r]   r   r   r   r_   r   
get_devicerb   r*   rc   )rd   r   r   r   r$   r%   ru   m_ocr/   rj   rm   rn   ro   rp   rq   rl   rr   rs   s                       r8   _prepare_linear_fusion_creater|      s    IIK
NNJJLEQ OOEArq'RD.KHU3qzz|+<%=>?  $45A1!88881!9999SF 7D4w7L11VH<?VV6(,,U4DEE5'!"55kBM			M  "MdQ%=-1A5HHr:   c                     t        | j                         | g       }t        | j                               | _        |g| _        |S )Ndevice)r   
get_layoutr   rx   layoutoutputs)packed	output_irs     r8   _create_output_noder   !  sD    
I
 &V->->-@AFM[FNr:   c                        e Zd Z	 d	 d fdZ fdZedddddddee   dee   d	ee   d
edeee	      fd       Z
 xZS )ConvolutionUnaryc           
          t        |d         | _        t        |   |||d t        j
                  j                  j                  j                  d| j                   d       y )Nr   aoti_torch__mkldnn__convolution_pointwiseop_overloadcpp_kernel_name)	r   device_typesuper__init__torchopsmkldnn_convolution_pointwisedefaultselfr   rm   rs   	__class__s       r8   r   zConvolutionUnary.__init__-  sa     +6!95		((??GG)$*:*:);;YZ 	 	
r:   c                 b    |j                  d| j                   d       t        |   |       y Nz&torch/csrc/inductor/aoti_torch/c/shim_z.hinclude_extra_headerr   r   codegenr   wrapperr   s     r8   r   zConvolutionUnary.codegen=  2    $$4T5E5E4FbI	
 	 r:   r   r   r   r   padding_stride_r=   r!   scalarsc           
          t        | |||||||      \  }}}}}||t        |	      |
gz   }t        |||      }t        |      S )Nr   rm   rs   )rt   r   r   r   )rd   r   r   r   r   r   r=   r!   attrr   	algorithmrm   rs   rr   rz   r   s                   r8   createzConvolutionUnary.createC  sr    ( /FD(GY
	
 &#G,)
 

 " '

 #6**r:   rC   returnN__name__
__module____qualname__r   r   classmethodr,   r.   r   r   r   __classcell__r   s   @r8   r   r   ,  s    
 	

 

 !  + +  + 	 +
 s) + c + 9 +  + $s)$ +  +r:   r   c                        e Zd Z	 	 d	 d fdZ fdZedddddddddee   d	ee   d
ee   dedede	e
   de	e   de	ee      de	e   fd       Z xZS )ConvolutionBinaryc           
          t        |d         | _        t        |   |||d t        j
                  j                  j                  j                  d| j                   d       || _	        y )Nr   r   %_mkldnn__convolution_pointwise_binaryr   )
r   r   r   r   r   r   r   r   binarycpp_constant_args)r   r   rm   rs   r   r   s        r8   r   zConvolutionBinary.__init__h  sk     +6!95		((??FF)$*:*:);;`a 	 	
 "3r:   c                 b    |j                  d| j                   d       t        |   |       y r   r   r   s     r8   r   zConvolutionBinary.codegenz  r   r:   r   r   r%   r   r   r   r   r=   r!   binary_attrbinary_alpha
unary_attrunary_scalarsunary_algorithmc           
          t        | |||||||      \  }}}}}| j                  ||      }|j                  d|       ||	|
|t        |      |gz   }t	        |||      }t        |      S )Nr	   r   )rt   r]   rc   r   r   r   )rd   r   r%   r   r   r   r   r=   r!   r   r   r   r   r   rm   rs   rr   rj   rz   r   s                       r8   r   zConvolutionBinary.create  s    . /FD(GY
	
 ((0@Aa%#M2)
 
 # '

 #6**r:   )rC   rC   r   )r   r   r   r   r   r   r,   r.   strr   floatr   r   r   r   s   @r8   r   r   g  s    
 3 
3$! '+'+ '+ 	'+
 '+ s)'+ c'+ 9'+ '+ '+ uo'+ SM'+  S	*'+ "#'+ '+r:   r   c                        e Zd Z	 d	 d fdZ fdZdeej                     fdZe	ddddddd	dd
e
e   de
e   de
e   dededee   dee   dee
e      dee   fd       Z xZS )ConvolutionBinaryInplacer   c           
         t        |d         | _        |d   |d   g|dd  z   }t        |   |||d t        j
                  j                  j                  j                  d| j                   d       t        t        |d   j                               |d   |       t        t        |d   j                               |d   |       g| _        y )Nr   r	   r(   r   &_mkldnn__convolution_pointwise_binary_r   r~   )r   r   r   r   r   r   r   _convolution_pointwise_r   r   r   rx   mutation_outputs)r   rr   rm   rs   reordered_inputsr   s        r8   r   z!ConvolutionBinaryInplace.__init__  s     +6!95"1Ivay1F12J>		((@@GG)$*:*:);;ab 	 	
 :VAY-A-A-CDfQiQUV:VAY-A-A-CDfQiQUV!
r:   c                 b    |j                  d| j                   d       t        |   |       y r   r   r   s     r8   r   z ConvolutionBinaryInplace.codegen  r   r:   c                     t               S rB   r   r   s    r8   get_unbacked_symbol_defsz1ConvolutionBinaryInplace.get_unbacked_symbol_defs  
    |r:   r   r   r%   r   r   r   r   r=   r!   r   r   r   r   r   c           
         t        | |||||||      \  }}}}}| j                  ||      }|j                  d|       ||	|
|t        |      |gz   }t	        t        |d   j                               ||      }|j                  d   S )Nr	   r~   )rr   rm   rs   r   )rt   r]   rc   r   r   r   rx   rm   )rd   r   r%   r   r   r   r   r=   r!   r   r   r   r   r   rm   rs   rz   rj   r   s                      r8   r   zConvolutionBinaryInplace.create  s    . /FD(GY
	
 ((0@Aa%#M2)
 
 *$F1I,@,@,BC'
 }}Qr:   r   r   )r   r   r   r   r   r   rV   Symbolr   r   r,   r.   r   r   r   r   r   r   r   s   @r8   r   r     s    
 	

 

0!*U\\*B  * *  *  	* 
 *  s)*  c*  9*  *  *  uo*  SM*   S	**  "#*  * r:   r   c                        e Zd Z	 d	 d fdZ fdZedddddddee   dee   d	ee   d
ee   dedeee	      fd       Z
 xZS )ConvolutionTransposeUnaryc                     t         |   |||d t        j                  j                  j
                  j                  d       y )N6aoti_torch_cpu_mkldnn__convolution_transpose_pointwiser   )r   r   r   r   r    _convolution_transpose_pointwiser   r   s       r8   r   z"ConvolutionTransposeUnary.__init__  s?     			((IIQQT 	 	
r:   c                 F    |j                  d       t        | 	  |       y Nz+torch/csrc/inductor/aoti_torch/c/shim_cpu.hr   r   r   r   s     r8   r   z!ConvolutionTransposeUnary.codegen      $$%RS r:   r   r   r   r   r   output_padding_r   r=   groups_r   c                     d}t        | |||||||||
      \  }}}}}||	t        |
      |gz   }t        |||      }t        |      S )NTr   )rt   r   r   r   )rd   r   r   r   r   r   r   r=   r   r   r   r   r"   rm   rs   rr   rz   r   s                     r8   r   z ConvolutionTransposeUnary.create  s     
 /
	
 &#G,)
 

 + '

 #6**r:   r   r   r   r   s   @r8   r   r     s    
 	

 

! ++++ ++ 	++
 s)++ c++ c++ 9++ ++ $s)$++ ++r:   r   c                        e Zd Z	 d	 d fdZ fdZeddddddddddd	dd
ddee   dee   dee   dededefd       Z	 xZ
S )QConvPointWisePT2Ec                     t        |      dk(  | _        t        |   |||dt        j
                  j                  j                  j                  d       y)a  
        if bias is not None
            - inputs = [x, w, b, weight_scale, weight_zp]
            - const_args is: [stride, padding, dilation, groups, x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, w, weight_scale, weight_zp]
            - const_args is: [bias, stride, padding, dilation, groups, x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
           N&aoti_torch_cpu__qconv_pointwise_tensorr   )	r)   has_biasr   r   r   r   onednnqconv_pointwiser   r   s       r8   r   zQConvPointWisePT2E.__init__?  sO      Fq(		((88@@D 	 	
r:   c                     |j                  d       t        | 	  |       t        | j                  t
              r| j                  |       y y r   r   r   r   rP   r   r   codegen_size_assertsr   s     r8   r   zQConvPointWisePT2E.codegenY  ?    $$%RS dkk6*%%g. +r:   qxr   rn   ro   qwrp   rq   r   r   r   r    r!   output_scaleoutput_zero_pointc                 0   d}d }t        | ||||	||
|||||||g      \  }}}}}||d   |d   c|d<   |d<   n|d   |d   c|d<   |d<   |||||t        |      |gz   }|J |t        j                  t        j                  fv r||_        t        |||      S )NFr(   r	   r   r   )rt   r   r   float32bfloat16dtyper   )rd   r   rn   ro   r   rp   rq   r   r   r   r    r!   r   r   output_dtyper   r   r   r"   r#   rm   rs   rr   rz   s                           r8   r   zQConvPointWisePT2E.create_  s   * 
 /lG\:
	
 <1>q1A=QRCS.M!mA.1>q1A=QRCS.M!mA.%#G,)
 
 '''EMM5>>:: #/M! '
 	
r:   r   r   )r   r   r   r   r   r   r,   r.   r   r   r   r   s   @r8   r   r   >  s    
 	

 

4/ B
B
 B
 "	B

 B
 B
 "B
 B
 S	B
 cB
 s)B
 B
 B
 B
 B
r:   r   c                        e Zd Z	 d	 d fdZ fdZdee   fdZdee	j                     fdZeddddd	dd
ddddddee   dee   dee   deddddfd       Z xZS )QConvPointWiseBinaryPT2Er   c                     t        |      dk(  | _        d| _        t        |   |||dt
        j                  j                  j                  j                  d       y)ag  
        Needs input/weight/output qparams
        if bias is not None
            - inputs = [x, x_scale, x_zp, w,  w_scale, w_zp, accum, b]
            - const_args = [stride, padding, dilation, groups, o_scale, o_zp,
            output_dtype, accum_scale, accum_zp, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, x_scale, x_zp, w,  w_scale, w_zp, accum]
            - const_args [b, stride, padding, dilation, groups, o_scale, o_zp,
             output_dtype, accum_scale, accum_zp, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
              N/aoti_torch_cpu__qconv2d_pointwise_binary_tensorr   )
r)   r   idx_for_inplace_sumr   r   r   r   r   qconv2d_pointwiser   r   s       r8   r   z!QConvPointWiseBinaryPT2E.__init__  sW    " Fq(#$ 		((::AAN 	 	
r:   c                     |j                  d       t        | 	  |       t        | j                  t
              r| j                  |       y y r   r   r   s     r8   r   z QConvPointWiseBinaryPT2E.codegen  r   r:   c                 R    | j                   | j                     j                         gS rB   )rm   r   get_namer   s    r8   get_mutation_namesz+QConvPointWiseBinaryPT2E.get_mutation_names  s#    D445>>@AAr:   c                     t               S rB   r   r   s    r8   r   z1QConvPointWiseBinaryPT2E.get_unbacked_symbol_defs  r   r:   r   r   rn   ro   r   qaccumr   r   r   r    r!   r   r   c                    d}d }t        | ||||
|	||||||||g|      \  }}}}}||d   |d   c|d<   |d<   n|d   |d   c|d<   |d<   |||||||||t        |      |g
z   }|dk(  sJ d       t        j                  j	                  |j                                t        t        |j                               ||      }|j                  |j                     S )	NFr(   r	   r   sumzCFor now, only post op sum is supported in QConvPointWiseBinaryPT2E.r~   r   )rt   r   r   rT   mark_buffer_mutatedr   r   r   rx   rm   r   )rd   r   rn   ro   r   rp   rq   r   r   r   r   r    r!   r   r   r   accum_scaleaccum_zero_pointr   alphar   r   r   r"   r#   rm   rs   _kernel_layoutrj   r   s                                 r8   r   zQConvPointWiseBinaryPT2E.create  sA   4 
 /lG\:
	
" <1>q1A=QRCS.M!mA.1>q1A=QRCS.M!mA.%#M2)
 
 e# 	
Q	
# 	
##FOO$56)V%6%6%89'
 }}V7788r:   r   r   )r   r   r   r   r   r   r   r   r   rV   r   r   r   r,   r.   r   r   r   s   @r8   r   r     s    
 	

 

8/BHSM B*U\\*B  O9O9 O9 "	O9
 O9 O9 O9 S	O9 cO9 s)O9 O9 "O9 'O9 O9r:   r   c                   @     e Zd Z	 d	 d fdZ fdZed        Z xZS )MKLPackedLinearc                     t         |   |||d t        j                  j                  j
                  j                         y N)r   )r   r   r   r   mkl_mkl_linearr   r   s       r8   r   zMKLPackedLinear.__init__"  s:     			1199 	 	
r:   c                 F    |j                  d       t        | 	  |       y r   r   r   s     r8   r   zMKLPackedLinear.codegen0  r   r:   c                    | j                  | j                  |            }| j                  | j                  |            }|j                         ^ }}|j                         \  }}t        |      |gz   }	t	        j
                  |	      }
|||g}|g}|||gz  }n|j                  dd        t        t        |j                         |j                         |	|
      ||      S )Nr   r   )require_stride1realize_inputrw   r,   r   r_   rc   r  r   rx   rb   )rd   r   packed_worig_wB
batch_sizery   rz   r{   r/   rl   rm   rs   s                r8   r   zMKLPackedLinear.create4  s     1 1! 45$$S%6%6v%>?

A!A1gn&99+FXv&#=qcMF  D){M '
 	
r:   r   r   r   r   r   r   r   r   r   r   r   s   @r8   r  r  !  s0    
 	

 

! 
 
r:   r  c                   F     e Zd Z	 d	 d fdZ fdZed        Zd Z xZS )LinearUnaryc                     t         |   |||d t        j                  j                  j
                  j                  d       y )N aoti_torch_cpu__linear_pointwiser   )r   r   r   r   r   _linear_pointwiser   r   s       r8   r   zLinearUnary.__init__M  s?     			((::BB> 	 	
r:   c                 F    |j                  d       t        | 	  |       y r   r   r   s     r8   r   zLinearUnary.codegen\  r   r:   c                    | j                  | j                  |            }| j                  | j                  |            }|j                         ^ }}|j                         \  }	}t        |      |	gz   }
||g}||r|ndg|g}|2| j                  | j                  |            }|j	                  |       n|j                  dd        t        t        |j                         |j                         |
      ||      }t        |      S )Nr   r   r   rG   r   )require_contiguousr  rw   r,   r*   rc   r  r   rx   rb   r   )rd   r   wr  r   r   r   ry   _icr{   r/   rm   rs   r   s                 r8   r   zLinearUnary.create`  s    ""3#4#4Q#78""3#4#4Q#78**,C**,C1gnQ'wtYG=&&s'8'8';<AMM!  D)||~kkm 
 '
 #6**r:   c                      y rB   rC   r   s    r8   apply_constraintzLinearUnary.apply_constraint{      r:   r   r   )	r   r   r   r   r   r   r   r"  r   r   s   @r8   r  r  L  s5    
 	

 

! + +4r:   r  c                   J     e Zd ZdZ	 d	 d fdZ fdZed        Zd Z xZ	S )LinearBinaryz)torch.ops.mkldnn._linear_pointwise.binaryc                     t         |   |||d t        j                  j                  j
                  j                  d       y )N'aoti_torch_cpu__linear_pointwise_binaryr   )r   r   r   r   r   r  r   r   s       r8   r   zLinearBinary.__init__  s?     			((::AAE 	 	
r:   c                 F    |j                  d       t        | 	  |       y r   r   r   s     r8   r   zLinearBinary.codegen  r   r:   c                 J   | j                  | j                  |            }| j                  | j                  |            }| j                  | j                  |            }|j                         ^ }}|j                         \  }}t        |      |gz   }	|||g}
|g}|2| j                  | j                  |            }|
j	                  |       n|j                  d|       t        t        |j                         |j                         |	      |
|      }t        |      S )Nr   r  r   )r  r  rw   r,   r*   rc   r%  r   rx   rb   r   )rd   r   yr  r  r   ry   r   r{   r/   rm   rs   r   s                r8   r   zLinearBinary.create  s
   ""3#4#4Q#78""3#4#4Q#78""3#4#4Q#78**,C**,C1gnQ=&&s'8'8';<AMM!  A&||~kkm 
 '
 #6**r:   c                      y rB   rC   r   s    r8   r"  zLinearBinary.apply_constraint  r#  r:   r   r   )
r   r   r   r6   r   r   r   r   r"  r   r   s   @r8   r%  r%    s:    8F 	

 

! + +6r:   r%  c                   h     e Zd Z	 	 d	 d fdZ fdZeddddddddddd	dd
ddedefd       Z xZ	S )QLinearPointwisePT2Ec                     || _         t        | 	  |||dt        j                  j
                  j                  j                  d       y)a  
        if bias is not None
            - inputs = [x, w, b, weight_scale, weight_zp]
            - const_args is: [x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, w, weight_scale, weight_zp]
            - const_args is: [bias, x_scale, x_zp, o_scale, o_zp,
              fp32_output, unary_attr, unary_scalars, unary_algorithm]
        N(aoti_torch_cpu__qlinear_pointwise_tensorr   )r   r   r   r   r   r   qlinear_pointwisetensorr   r   rm   rs   r   r   s        r8   r   zQLinearPointwisePT2E.__init__  sF    " !));;BBG 	 	
r:   c                     |j                  d       t        | 	  |       t        | j                  t
              r| j                  |       y y r   r   r   s     r8   r   zQLinearPointwisePT2E.codegen  ?    $$%RS dkk6*%%g. +r:   r   r   rn   ro   r   rp   rq   r   r   r   c           
          t        | |||||||g      \  }}}}}|||	|
|t        |      |gz   }|
J |
t        j                  t        j                  fv r|
|_        t        ||||d u      S )Nr   rm   rs   r   )r|   r   r   r   r   r   r-  )rd   r   rn   ro   r   rp   rq   r   r   r   r   post_op_namepost_op_argspost_op_algorithmrm   rs   rr   rz   s                     r8   r   zQLinearPointwisePT2E.create  s    " 8UlG\:8
4q! &#L1)
 
 '''EMM5>>:: #/M# '$&	
 	
r:   rC   Tr   )
r   r   r   r   r   r   r   r.   r   r   r   s   @r8   r-  r-    s    
 
 

6/ ,
,
 ,
 "	,

 ,
 ,
 ",
 ,
 ,
 ,
 ,
r:   r-  c                   ~     e Zd Z	 	 d	 d fdZ fdZdee   fdZeddddddd	dd
dddddddde	de
fd       Z xZS )QLinearPointwiseBinaryPT2Er   c                     || _         d| _        t        |   |||dt        j
                  j                  j                  j                  d       y)a  
        if bias is not None
            - inputs = [x, w, x_scale, x_zp, weight_scale, weight_zp, x2, bias]
            - const_args is: [o_scale, o_zp,
              fp32_output, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
        else
            - inputs = [x, w, x_scale, x_zp, weight_scale, weight_zp, x2]
            - const_args is: [bias, o_scale, o_zp,
              fp32_output, binary_attr, alpha, unary_attr, unary_scalars, unary_algorithm]
        r   N/aoti_torch_cpu__qlinear_pointwise_binary_tensorr   )	r   r   r   r   r   r   r   r0  binary_tensorr2  s        r8   r   z#QLinearPointwiseBinaryPT2E.__init__	  sN    " !#$ ));;IIM 	 	
r:   c                     |j                  d       t        | 	  |       t        | j                  t
              r| j                  |       y y r   r   r   s     r8   r   z"QLinearPointwiseBinaryPT2E.codegen%  r   r:   c                 ~    | j                   d   }|dk(  r(| j                  | j                     j                         gS g S )Nr   )rs   rm   r   r   )r   binary_post_ops     r8   r   z-QLinearPointwiseBinaryPT2E.get_mutation_names+  s@    ++B/U"KK 8 89BBDEEIr:   r   r   rn   ro   r   rp   rq   r%   r   r   r   c                    t        | |||||||g||dk(        \  }}}}}||	|
||||||t        |      |g
z   }|dk(  rot        j                  j	                  |j                                t        t        |j                               |||d u      }|j                  |j                     S |J |t        j                  t        j                  fv r||_        t        ||||d u      S )Nr   r~   r6  )r|   r   r   rT   r  r   r<  r   rx   rm   r   r   r   r   r   )rd   r   rn   ro   r   rp   rq   r%   r   r   r   r   other_scaleother_zprC  r   unary_post_opunary_post_op_argsunary_post_op_algorithmrm   rs   rr   rj   r   s                           r8   r   z!QLinearPointwiseBinaryPT2E.create2  s$   8 *lG\:e#
	
 &#$67#)
 
 U"GG''(89/!)9)9);<+d*	F ==!;!;<<'''EMM5>>:: #/M) '$&	
 	
r:   r:  r   )r   r   r   r   r   r   r   r   r   r   r.   r   r   r   s   @r8   r<  r<    s    
 
 

8/HSM  H
H
 H
 "	H

 H
 H
 "H
 H
 H
 H
 H
 H
r:   r<  c            !            e Zd Z	 d	 d fdZeddddddddddddd	dd
edee   dededededededef d       Z fdZ	 xZ
S )MkldnnRnnLayerc                     t         |   |||d t        j                  j                  j
                  j                         y r	  )r   r   r   r   atenmkldnn_rnn_layerr   r   s       r8   r   zMkldnnRnnLayer.__init__  s:     			77?? 	 	
r:   r   r   w0w1w2w3hxcxreversebatch_sizesmodehidden_size
num_layers
has_biasesbidirectionalbatch_firsttrainc                    | j                  | j                  |            }|j                          | j                  | j                  |            }| j                  | j                  |            }| j                  | j                  |            }| j                  | j                  |            }| j                  | j                  |            }|j                          | j                  | j                  |            }|j                          |j                         }t	        |      dk(  sJ d       |\  }}}|||g}|j                         }|j                         }|||||||g}||	|
||||||g	}t        t        |j                               ||      }d }|||dgg} |||      t        j                  |      t        j                  |      dgg}t        t        ||            D cg c]D  \  }\  }}t        t        |j                         |j                         ||      |t        |fg      F } }}}| |_        | S c c}}}w )NrM   zExpect lstm input to be 3Dr~   )rm   rs   c                 V    t        |       dk(  sJ d       t        j                  |       S )NrM   zExpect output_shape to be 3D)r)   r   r_   )output_shaper\  s     r8   get_strides_of_lstm_outputz9MkldnnRnnLayer.create.<locals>.get_strides_of_lstm_output  s,    |$)I+II)!44\BBr:   r	   )r  r  freeze_layoutrw   r)   rK  r   rx   r   r_   	enumeratezipr   r   rb   tupler   )!rd   r   rO  rP  rQ  rR  rS  rT  rU  rV  rW  rX  rY  rZ  r[  r\  r]  r4   
seq_length
mini_batchr`  hy_shapecy_shaperm   rs   r   ra  output_sizesoutput_stridesrQ   r/   rl   r   s!                                    r8   r   zMkldnnRnnLayer.create  s]   (  1 1! 45 	
  !2!22!67  !2!22!67  !2!22!67  !2!22!67  !2!22!67
  !2!22!67
ZZ\
:!#A%AA# .8*
J
"J<;;=;;=RRR,

  Q\\^4'
	C %h1#>&|[A--h7--h7C	
" 4=L.14
 
 0/K LLNKKM!	 	
	 
 ##
s   2A	Ic                 D    |j                  d       t        | 	  |      S r   r   r   s     r8   r   zMkldnnRnnLayer.codegen  s!    $$%RSww''r:   r   r   )r   r   r   r   r   boolr,   r.   r   r   r   r   s   @r8   rK  rK  ~  s    
 	

 

 [[ [ 	[
 [ [ [ [ [ #Y[ [ [ [ [ [  ![" #[ [z( (r:   rK  c                   R     e Zd Z	 d	 d fdZ fdZe	 	 	 	 	 	 	 	 dd       Z xZS )WeightInt4PackMatmulc                     t        |      dk(  sJ t        |      dk(  sJ t        | 	  |||dt        j                  j
                  j                  j                  d       y)zY
        inputs = [x, w, qGroupSize, qScalesAndZeros]
        constant_args = ()
        rN   r   N-aoti_torch_cpu__weight_int4pack_mm_cpu_tensorr   )r)   r   r   r   r   	quantizedint4mm_packed_weight_cpur   r   s       r8   r   zWeightInt4PackMatmul.__init__  sd     6{a=!Q&&&,,EEMML 	 	
r:   c                     |j                  d       t        | 	  |       t        | j                  t
              r| j                  |       y y r   r   r   s     r8   r   zWeightInt4PackMatmul.codegen  r4  r:   c                    ||||g}|j                         ^ }}|j                         \  }}t        |      |gz   }	t        j                  |	      }
t	        |j                         |j                         |	|
      }t        ||      S )N)r   rm   )rw   r,   r   r_   r   rx   rb   ro  )rd   r   r  
qGroupSizeqScalesAndZerosrm   ry   rz   nr/   rl   rr   s               r8   r   zWeightInt4PackMatmul.create  s     Q
O4

Azz|11gm&99+F#LLNKKM	
 $ 
 	
r:   r   r   )r   r   r  r   rv  r   rw  r   r  r   s   @r8   ro  ro    sV    
 	

 

*/ 

 
  	

 %
 
r:   ro  )FNNN)NNF)2collections.abcr   typingr   r   rV   r   torch._prims_commonr   torch.utils._ordered_setr   irr
   r   r   r   r   r   r   r   r   r   r   r   r   utilsr   r   r   virtualizedr   r.   rm  r,   rt   r|   r   r   r   r   r   r   r   r  r  r%  r-  r<  rK  ro  rC   r:   r8   <module>r     s   $     > /    U T  .215#'AIAI AI 	AI
 c]AI SMAI smAI AI AI Xc]+AI D-.AI K AIR 26#'<I<I <I 	<I
 D-.<I K <I <I~8+( 8+vA+) A+HM 0 M `@+ 1 @+Fd
* d
Ny90 y9x(
' (
V0# 0f3$ 3lP
, P
fs
!2 s
lo(& o(f3
, 3
r:   