
    rh                        d dl Z d dlmZ d dlZd dlmc mZ d dlm	Z	 ddl
mZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZmZmZmZmZ ddlmZmZmZ ddlm Z m!Z! ddl"m#Z#m$Z$m%Z% dejL                  dej&                  dej&                  dej&                  dej&                  de'e(ej&                  eej&                     f   fdZ)de(de$de$dee$   dee$   dee$   dee$   de$fdZ*	 	 	 	 d#dede+e   de+e   fd Z,d!e,_-        d" Z.y)$    N)Optional)mm_args   )configir)CppGemmTemplate)CppGroupedGemmTemplatecreate_epilogue_with_attr)	TensorBox)addadd_needs_realized_inputsatenpermuteregister_loweringto_dtypeview)autotune_select_algorithmChoiceCallerExternKernelChoice)use_aten_gemm_kernelsuse_cpp_gemm_template)opsOpsValueVW_tensorpacked_weightx_scalex_zpw_scalereturnc                 Z   d}d }d }t        d |||fD              r%d}t        j                  j                  |j	                            t        j                  j                  |j	                            z  }t        j                  j                  ||j	                         dz         }t        j                  | j                  t        j                        d      }	t        j                  j                  |j	                            }
|	|z  |
z  }	t        j                  j                  |	|j	                         dz         }nft        j                  | j                  t        j                        d      }	t        j                  j                  |	|j	                         dz         }|||fS )	NFc              3   8  K   | ]  }t        |t        j                        xrr |j                         t        j
                  j                  v xrF t        |j                  d       xr. t        |j                  j                  t        j                          yw)dataN)

isinstancer   r   get_namer   graph	constantshasattrr$   ConstantBuffer).0items     s/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/torch/_inductor/mkldnn_lowerings.py	<genexpr>z+create_int8_compensation.<locals>.<genexpr>*   sy      
 	 	4& 	:MMOqww000	:DIIv&	: tyy~~r'8'89	:s   BBT_x_w_compensnamer   )dim_BMatrixCompens)
allr   r'   r(   r&   add_tensor_constanttorchsumtofloat)r   r   r   r   r    use_int8_fast_compensation_pathweight_compens	x_w_scalex_w_scale_tensorweight_compens_tensorx_zp_tensors              r-   create_int8_compensationr@       s    ',#NI
 
 dG,  +/'GGg..01gg 0 0 234 	 GG//'')N: 0 
	 !&		(++ekk*B Jgg''8 58H H; V44!''),== 5 

 !&		(++ekk*B J44!''),== 5 

 	(     r:   input_weight_compo_x_scale_x_zp_w_scale
_x_w_scalec                 Z   | r,t        j                  t        j                  ||      |      }|S t        j                  t        j                  ||      |      }t        j                  |t        j                  t        j                  t        j                  ||      |      |            }|S N)r   submul)r:   rB   rC   rD   rE   rF   rG   temps           r-   'codegen_int8_gemm_template_compensationrM   N   s     'wwGG 
H K9 wwGG 
 wwGGGG    	
 KrA   xwbc           
         | j                         }t        |      dkD  rt        | d|d   g      } t        |      }t        j                  st        j
                  sJ |D 	cg c]%  }	|	|	nt        j                  j                  |	      ' }}	g }
t        | t        |d   ddg      |      ^ }}} }t        |D 	cg c]  }	|	d u c}	dd t        j                  t        |      |             }| g|}|j                  |D 	cg c]  }	|	|		 c}	       t        j                   |
||fi | t        |
      dk7  sJ t#        d|
||      }|j$                  j$                  }t        |      D cg c]   }t        j&                  ||t(        |fg      " }}t        j*                  |d   j-                         	      |_        ||_        t        |      D cg c]$  }t        j2                  j5                  ||         & }}t        |      dkD  r>t        |      D ]0  }t        ||   g |d d ||   j                         d         ||<   2 |S c c}	w c c}	w c c}	w c c}w c c}w )
N   r   r   layoutT)has_biastrans_wepilogue_creatoract_mappinggrouped_gemm)device)get_sizelenr   r   max_autotunemax_autotune_gemmr   ExternKernelrealize_inputr   r   dictfromkeysrangeextendr	   add_choicesr   r$   MultiOutputlistMultiOutputLayout
get_devicerU   outputsr   create)rN   rO   rP   attrscalars	algorithmrU   x_sizenum_gemmbiaschoices_kwargsinput_nodesresulttemplate_bufgemm_idxreturn_bufsreturn_tensorss                      r-   grouped_gemm_loweringr|      sv    ZZ\F
6{QR$%1vH&":":::STU42??#@#@#F	FUAU"$Gq'!A$A"7GQ1/01t$d"1MM%/15	F 'q'K?d.>?@&& 	 w<1&	F ;;##L h 	v|tX.>-?@K  ..k!n6O6O6QRL&LCH?7?K12N  6{Qh 	H'+x(G&"+G~h7@@B2FG(N8$	
 [ 	V 2 @"s$   !*I:II	I-%I)I!Tc            !         t         j                  j                  rYddlm t        t         j                  j                  j                  ddj                  j                        t        t         j                  j                  j                  j                  ddj                  j                        t        t         j                  j                  j                  ddj                  j                        t        t         j                  j                  j                  j                  ddj                   j                        t         j                  j                  j"                  t         j                  j                  j$                  t         j                  j                  j&                  t         j                  j                  j                  t(        j*                  j,                  t         j                  j                  j.                  g} t1        t         j                  j                  j"                        dt2        dt2        d	t2        ffd
       }t1        t         j                  j                  j"                  j                        dt2        dt2        dt2        d	t2        ffd       }t1        t         j                  j                  j$                  j                        dt2        dt2        dt2        d	t2        ffd       }t1        t         j                  j                  j                        	 d3dt2        dt2        dt2        ffd       }t1        t         j                  j                  j                  j                        	 d3dt2        dt2        dt2        dt2        ffd       }t1        t         j                  j                  j&                        dt2        dt2        d	t2        ffd       }t1        t(        j*                  j,                        dt2        dt2        dt2        dt2        dt2        dt2        dt2        dt4        dt6        t8           dt8        dt8        dt8        dt4        d t4        d!t4        d"t4        f fd#       }t1        t         j                  j                  j.                  d $      dt2        d%t2        d&t2        d't2        d	t2        f
fd(       }t1        t         j                  j                  j:                  j                  d $      t1        t         j                  j                  j:                  j<                  d $      dt2        d%t2        d&t2        d't2        d)t2        d	t2        ffd*              }	t1        t         j                  j                  j                  d $      	 d3dt2        d%t2        d&t2        d't2        d	t2        f
fd+       }
t1        t         j                  j                  j                  j                  d $      t1        t         j                  j                  j                  j<                  d $      	 d3dt2        d%t2        d&t2        d't2        d,t2        d	t2        ffd-              }t         j                  j>                  rt        t         j                  j@                  jB                  d.djD                  j                        | jG                  t         j                  j@                  jB                         t1        t         j                  j@                  jB                        d d/dt2        d0t2        d1t2        dtH        t2           ffd2       }tK        |        y y )4Nr   )	mkldnn_irzmkldnn::_linear_pointwiseF)has_out_variantkernel_creatorzonednn::qlinear_pointwiserN   weightrr   c
                 r    t        j                  
j                  j                  | |||||||||	
            S rI   )r   rl   ConvolutionUnary)rN   r   rr   paddingstridedilationgroupsrm   rn   ro   r~   s             r-   convolution_unaryz5register_onednn_fusion_ops.<locals>.convolution_unary   sJ     ##**11 rA   otherc                 x    t        j                  j                  j                  | |||||||||	|
||            S rI   )r   rl   ConvolutionBinaryrN   r   r   rr   r   r   r   r   binary_attrbinary_alpha
unary_attrunary_scalarsunary_algorithmr~   s                r-   convolution_binaryz6register_onednn_fusion_ops.<locals>.convolution_binary  sS      ##++22 !# rA   c                 x    t        j                  j                  j                  | |||||||||	|
||            S rI   )r   rl   ConvolutionBinaryInplacer   s                r-   convolution_binary_inplacez>register_onednn_fusion_ops.<locals>.convolution_binary_inplace%  sS      ##2299 !# rA   rO   rP   c                    | j                         }t        |      dkD  rt        | d|d   g      } |t        j                  j                  |      }g }t        j                  st        j                  rvt        |ddg      }	t        | |	|      ^ }
}} }	t        || |	      rHfd}t        |d uddk(  rd n|	      }|g d
|d<   t        j                  |||| |gn| ||gfi | t        |      dk(  s
t               rAt              }|d |d<   |j!                   j"                  || |gn| ||g|fi |       |j%                         t&        j(                  j*                  v sJ dd i}t-        d||| |gn| ||g||      }t        |      dkD  r%t        |g |d d |j                         d         }|S )NrR   rS   r   r   rT   c                 "    t        |       S )Nrn   ro   r
   )bufro   rm   rn   s    r-   rX   zJregister_onednn_fusion_ops.<locals>.linear_unary.<locals>.epilogue_creator]  s    8w)  rA   TnonerV   rW   rX   )rR   r   r   input_indices)rm   rn   ro   Bc                 X    t         j                  j                  | j                            S rI   r   r'   r(   r&   rN   s    r-   <lambda>zBregister_onednn_fusion_ops.<locals>.linear_unary.<locals>.<lambda>|      QWW..qzz|< rA   linear_unaryinput_gen_fnsr\   r]   r   r   r`   ra   r   r^   r_   r   r   r   rb   r   rf   r   appendbindr&   r   r'   r(   r   )rN   rO   rP   rm   rn   ro   rU   rp   rs   transposed_wrt   rX   ru   r   rw   aten_mkldnn_linear_unarys      ```         r-   r   z0register_onednn_fusion_ops.<locals>.linear_unaryG  s    ZZ\F6{QR,-}OO11!4*,G""f&>&>&q1a&1.5af.U+FA|(LA
 "!"$ $15EUF
 }2;/#//"#)A!Q !	 7|q $9$;4IN9"&F3K1,11"#)A!Q ! ::<177#4#4444<M /)A!Q+F 6{Qf&Ks&KV__5Fr5J&KLMrA   yc                    | j                         }t        |      dkD  rt        | d|d   g      } j                         }t        |      dkD  rt        d|d   g      |t        j                  j                  |      }g }t        j                  st        j                  rvt        |ddg      }	t        | |	|      ^ }
}} }	t        || |	      rFfd}t        |d ud|      }|g d	ng d
|d<   t        j                  |||| |gn| ||gfi | t        |      dk(  s
t               rAt              }|d |d<   |j!                   j"                  || |gn| ||g|fi |       |j%                         t&        j(                  j*                  v sJ dd i}t-        d||| |gn| ||g||      }t        |      dkD  r%t        |g |d d |j                         d         }|S )NrR   rS   r   r   rT   c                      t        |       S )N)r   r
   )r   rm   r   s    r-   rX   zKregister_onednn_fusion_ops.<locals>.linear_binary.<locals>.epilogue_creator  s    8d!LLrA   Tr   )r   rR   r   )   r   rR   r   r   )rm   r   c                 X    t         j                  j                  | j                            S rI   r   r   s    r-   r   zCregister_onednn_fusion_ops.<locals>.linear_binary.<locals>.<lambda>  r   rA   linear_binaryr   r   )rN   r   rO   rP   rm   rU   rp   y_sizers   r   rt   rX   ru   r   rw   aten_mkldnn_linear_binarys    `  `          r-   r   z1register_onednn_fusion_ops.<locals>.linear_binary  s/    ZZ\F6{QR,-ZZ\F6{QR,-}OO11!4*,G""f&>&>&q1a&118|Qv2.FA|Q )LAM "!"$ $)9F
 <=9i,F?+#//%&YAq	Q1aL !	 7|q $9$;49"&F3K2-22%&YAq	Q1aL ! ::<177#4#4444<M /YAq	Q1aL+F 6{Qf&Ks&KV__5Fr5J&KLMrA   c                 t    t        j                  j                  j                  | |||||||||	|
            S rI   )r   rl   ConvolutionTransposeUnary)rN   r   rr   r   output_paddingr   r   r   rm   rn   ro   r~   s              r-   convolution_transpose_unaryz?register_onednn_fusion_ops.<locals>.convolution_transpose_unary  sM     ##33::" rA   w0w1w2w3hxcxreversebatch_sizesmodehidden_size
num_layers
has_biasesbidirectionalbatch_firsttrainc                     t        j                  t        j                  j                  j                  | |||||||||	|
|||||            S rI   )pytreetree_mapr   rl   MkldnnRnnLayer)rN   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r~   s                   r-   mkldnn_rnn_layerz4register_onednn_fusion_ops.<locals>.mkldnn_rnn_layer  sc    & ??  ((//!! rA   )type_promotion_kindr   r    w_zpc                    t        |      t        k(  sJ t        j                  j	                  t        j                  |t
        j                        d      }t        |      t        k(  sJ t        j                  j	                  t        j                  |t
        j                        d      }t        j                  j                  j                  | |||||||||	|
||||||            S )Ndtyper   r0   r   )typer9   r   r'   r5   r6   tensorfloat32intint32r   rl   QConvPointWisePT2E)rN   r   r   r   r    r   rr   r   r   r   r   o_inv_scaleo_zero_pointoutput_dtyperm   rn   ro   r~   s                    r-   qconvolution_unaryz6register_onednn_fusion_ops.<locals>.qconvolution_unary  s    * =E)))gg11WEMM: 2 G :$$$77..T5F / D ##,,33!  # rA   accumc                    t        |      t        k(  sJ t        j                  j	                  t        j                  |t
        j                        d      }t        |      t        k(  sJ t        j                  j	                  t        j                  |t
        j                        d      }|dk(  rq|t
        j                  t
        j                  fv rO|j                         t
        j                  t
        j                  fv r|j                         |k7  rt        ||      }t        j                  j                  j                  | |||||||||	|
|||||||||||            S )Nr   r   r0   r   r7   )r   r9   r   r'   r5   r6   r   r   r   r   bfloat16	get_dtyper   r   rl   QConvPointWiseBinaryPT2E)rN   r   r   r   r    r   r   rr   r   r   r   r   r   r   r   accum_scaleaccum_zpr   alphar   r   unary_algorithmmr~   s                         r-   qconvolution_binaryz7register_onednn_fusion_ops.<locals>.qconvolution_binaryB  sF   > =E)))gg11WEMM: 2 G :$$$77..T5F / D
 u$ U]]ENN$CCOO%%--)HHOO%5 !5##2299!  !$- rA   c                 \  	
 |j                         t        j                  u sJ d       | j                         }t	        |      dkD  rt        | d|d   g      } t        t        j                        sYt              t        k(  sJ t        j                  j                  t        j                  t        j                        d      n^j!                          t#        d j                         D              rt        g       t	        j                               dv sJ d	       Dt        j                  j                  t        j                  d
t        j$                        d      t        t        j                        sYt              t&        k(  sJ t        j                  j                  t        j                  t        j$                        d      nj!                          j)                         dk(  sJ d       |Dt        j                  j                  t        j                  d
t        j$                        d      }j!                          |j!                          |j                         t        j$                  k7  rt        t        j*                  j-                  |      t        j.                        rt        j                  j0                  |j3                            j5                  t        j$                        }t        j                  j                  t        j                  |t        j$                        |j3                               }d nj                         g }t6        j8                  st6        j:                  rt=        | ||	      ^ }}} }t        t        j*                  j-                  |      t        j.                        rNt        j>                  t        j@                  t        j                  j0                  |j3                                  t        j                  j0                  |j3                                  rtC        || |      rt        j                  j0                  |j3                            jE                         }tG        ||      \  
	fd}| j                         t        jH                  t        j                  fv sJ tK        jL                  ||| ||gn| ||gd u|g dng d       t	        |      d
k(  s
tO               rLtQ        	
      }d |d<   |jS                   jT                  | ||fn| ||f|fi |       |j3                         t        j                  j0                  v sJ d d d d d}t        t        j*                  j-                        t        j.                        rd |d<   t        t        j*                  j-                        t        j.                        rd |d<   tW        d|| ||gn| ||g||      }t	        |      dkD  r%t        |g |d d |j                         d         }|S )Nz2Only int8 weights are supported by oneDNN qlinear.rR   rS   r   r   r0   c              3   &   K   | ]	  }|d k(    ywr   N r+   r2   s     r-   r.   zDregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<genexpr>       >Csax>   r   r   x_scale must be 0D or 1Dr   r   r   z(x_zp is incompatible with oneDNN qlinearr   rU   	out_dtypec                 Z  	
 t         j                  t         j                  t         j                  t         j                  fv sJ | j                         j                         d rJ j                         j                         j                         
j                         d j                         
f
d}t        j                  | j                         t         j                  || j                               }dk7  rt        |      }t         j                  k(  rM|j                         fd}t        j                  |j                         ||j                               }|S t         j                  t         j                  fv rzddlm |j                         		fd}t        j                  |j                         t        j                  |t!              t#              	      |j                               }|S )
Nc           	        
  |       }t        j                  |t        j                        }| d   f}d }d }d }s d      } d      } |      } |      }d }rJ  |      }t	        ||||||      }
y |      }	t        j                  t        j
                  fv sJ t        j
                  k(  r$t        j                  |	t        j                        }	t        j                  ||	      }|S NrS   r   r   r   r6   r   rM   r   r   )indexrB   weight_compens_indexrD   rE   rF   rC   rG   rL   _biasrr   
bias_dtypebias_loaderinput_loaderr:   w_scale_loaderweight_compens_loaderx_scale_loaderx_w_scale_loaderx_zp_loaders             r-   inner_fnz]register_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn  s   $0$7E %(LL$FE49"I<0'+H$(E'+H#B+9"+=(3B+9:N+O,ABV,WM)-J>'7'C C'C-=>R-S
#J ? % - ( % ( *$D  $/(34H(I'1emmU^^5T'T T'T#-#?,/LL,NE'*wwtU';#'KrA   r[   r   r   rangesr   r   c                 @     |       }t        j                  |      S rI   r   r   r   rB   output_cast_loaderr   s     r-   inner_fn_cast_output_to_bf16zqregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn_cast_output_to_bf16O      (:5(A'*||E<'H HrA   r   _create_constantsc                     |       } 	d|z  |t         j                        \  }}t        j                  ||z        |z   }
t         j                  k(  r 	ddt         j                        \  }}n 	ddt         j                        \  }}t        j
                  t        j                  ||      |      }t        j                  |
      S Ng      ?r   r      i   r6   r   r   rounduint8minimummaximumr   r   scale
zero_pointrB   	inv_scalevalqminqmaxclampedr  r   requant_input_loaders            r-   inner_fn_requantzeregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator.<locals>.inner_fn_requant^  s    (<U(C8I$'%K5==9" 5	: '*ii	0A&BZ&O#/5;;#>1B()3emm2&JD$ 2C(,c2&JD$ +.++ckk#t6Ld*S'*||G\'J JrA   r  r  r6   r   r   r  int8make_loaderr   	Pointwiserj   r\   r   get_device_or_errorloweringr  	functoolspartialr9   r   )input_bufferr   
output_bufr  r  r  r   r   r  r  r   r   r   r   r   ro   rm   rr   r   o_scaler   r   rn   r:   r    r;   r   r<   r   s        @@@@@@@@@@r-   rX   zKregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.epilogue_creator  s   +!MM!NN!KK!JJ	0      (4'?'?'A0>0J0J0L-+/(:#,#88#8/8/D/D/F,)0)<)<)>)0)<)<)>&*&6&6&8&*+*.*:*:*<K'( '(R &(\\#/#:#:#<"'--%-#/#8#8#:	&
  6>)B *D'Y*J
 (5>>91;1G1G1I.I *,'1'E'E'G&2)E'1':':'<	*JN  *)C *ekk5::-FFC3=3I3I3K0K" *,'1'E'E'G&2)2):):$4*/./2</@*"
 (2':':'<	*J  *)rA   )r   r   r   rR         )   r   r   r   rR   r(  r)  rV   rX   r   )output_scaleoutput_zero_pointr   post_op_namepost_op_argspost_op_algorithmrr   c                 X    t         j                  j                  | j                            S rI   r   r   s    r-   r   zCregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  r   rA   c                 X    t         j                  j                  | j                            S rI   r   r   s    r-   r   zCregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  r   rA   c                 X    t         j                  j                  | j                            S rI   r   r   s    r-   r   zCregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  r   rA   c                 X    t         j                  j                  | j                            S rI   r   r   s    r-   r   zCregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  r   rA   )r   r(  r)  r*  c                 X    t         j                  j                  | j                            S rI   r   r   s    r-   r   zCregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>      QWW->->qzz|-L rA   c                 X    t         j                  j                  | j                            S rI   r   r   s    r-   r   zCregister_onednn_fusion_ops.<locals>.qlinear_unary.<locals>.<lambda>  r6  rA   qlinear_unaryr   ),r   r6   r  r\   r]   r   r%   r   r   r   r9   r   r'   r5   r   r   realizer4   r   r   	get_numelInputsKernelunwrap_storage_for_inputr*   r(   r&   r8   r   r^   r_   r   equal
zeros_liker   to_denser@   r  r   rf   r   rb   r   r   r   )rN   r   r   r   r    r   rr   r'  r   r   rm   rn   ro   rU   rp   w_zp_tensorrs   rt   r   rX   ru   r   rw   r   r:   r;   r<   aten_mkldnn_qlinear_unarys    `` ` ```````          @@@@r-   r8  z1register_onednn_fusion_ops.<locals>.qlinear_unary  s   " !**,

: D: ZZ\F6{QR,-gr||4G}---''55LL>Y 6  !>7+;+;+=>> #7B/G7++-.&8T:TT8|
 ww22LL%++6V 3  dBLL1DzS(((ww22LLU[[9 3  >>#q(T*TT(
 | ww22LL%++6V 3  OOLLN~~5;;.:88>!!4
  gg//@CCEKKPww22LLEKK@t}} 3  "&4>>3CJ*,G""f&>&>/6}V|0,FA} @@F)) (():):4==?)KL))$--/: ,FA}E ww001G1G1IJSSUH 1 %	7&!{* {* {*z ;;=U[[%**,EEEE#//< GT='4H$wdS!%T!1)9< '92 7|q $9$;!(&2!-!%!(&/ <%)F6N2-22< GT='4H$wdS	
 ! !))+qww/@/@@@@<<<<	M 88A!!
 $Ma 88>!! $Ma .< GT='4@$wdK+F 6{Qf&Ks&KV__5Fr5J&KLMrA   x2c                 F  	
 ! | j                         }j                         }t        |      t        |      k(  sJ t        |      dkD  r'|dk(  r"t        | d|d   g      } t        d|d   g      t        t        j
                        sYt              t        k(  sJ t        j                  j                  t        j                  t        j                        d      n^j                          t        d j                         D              rt        g       t        j                               dv sJ d	       Dt        j                  j                  t        j                  d
t        j                         d      |Dt        j                  j                  t        j                  d
t        j                         d      }t        t        j
                        sYt              t"        k(  sJ t        j                  j                  t        j                  t        j                         d      nj                          j                          |j                          |j%                         t        j                   k7  rt        t        j&                  j)                  |      t        j*                        rt        j                  j,                  |j/                            j1                  t        j                         }t        j                  j                  t        j                  |t        j                         |j/                               }|dk(  r
t        j                  t        j2                  fv rPj%                         t        j                  t        j2                  fv r j%                         
k7  r't5        
      nj%                         
k(  sJ d       j%                          j%                         nd g }t6        j8                  st6        j:                  r|dk(  rt=        | ||
      ^ }}} }t        t        j&                  j)                        t        j*                        rt        j?                         j@                        d
k(  rZt        t        j&                  j)                  |      t        j*                        r"t        jB                  t        jD                  t        j                  j,                  |j/                                  t        j                  j,                  |j/                                  rtG        || |      rt        j                  j,                  |j/                            }|jI                         }tK        ||      \  !	
 !fd}tM        jN                  ||	| ||gn	| ||gd u|g dng d       t        |      d
k(  s
tQ               rRtS        	
||||
      }d |d<   |jU                   "jV                  	| ||fn	| ||f|fi |       |j/                         t        j                  j,                  v sJ d d d d}d |d<   tY        d|	| ||gn	| ||g||      }t        |      dkD  r*|dk(  r%t        |g |d d |j                         d         }|S )NrR   r   rS   r   r   r0   c              3   &   K   | ]	  }|d k(    ywr   r   r   s     r-   r.   zEregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<genexpr>  r   r   r   r   r   r   r   r7   zCdtype of accum for qlinear post op sum should be the same as outputr   c                   	
 t         j                  t         j                  t         j                  t         j                  fv sJ | j                         j                         j                         d rJ j                         j                         j                         
j                         d j                         
fd}t        j                  | j                         t         j                  || j                               }dk7  rt        |      }t         j                  k(  rM|j                         fd}t        j                  |j                         ||j                               }|S t         j                  t         j                  fv rddlm |j                         		fd}t        j                  |j                         t         j                  t        j                  |t!              t#              	      |j                               }|S )
Nc           	          |       } |       }d }d }d }| d   f}s d      } d      } |      }t        j                  |t        j                        } |      }d }rJ  |      }t	        ||||||      }	y |      }
t        j                  t        j
                  fv sJ t        j
                  k(  r$t        j                  |
t        j                        }
t        j                  |	|
      }	t        j                  t        j
                  fv sJ t        j
                  k(  r$t        j                  |t        j                        }t        j                  |	|      }	|	S r   r   )r   rB   _x2rD   rE   rF   r   rC   rG   rL   r   rr   r   r   r   r:   r   r   x2_dtype	x2_loaderr   r   r   s              r-   r   z^register_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fnZ  s`   $0$7E"+E"2C'+H$(E'+H49"I<0#B+9"+=(3B+9:N+O$'LL$FE,ABV,WM)-J>'7'C C'C-=>R-S
#J ? % - ( % ( *$D  $/(34H(I'1emmU^^5T'T T'T#-#?,/LL,NE'*wwtU'; $,u~~/N#NN#N'5>>9&)ll3&F#&774#5D#'KrA   r   r   r   c                 @     |       }t        j                  |      S rI   r  r  s     r-   r  zrregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn_cast_output_to_bf16  r  rA   r   r  c                     |       } 	d|z  |t         j                        \  }}t        j                  ||z        |z   }
t         j                  k(  r 	ddt         j                        \  }}n 	ddt         j                        \  }}t        j
                  t        j                  ||      |      }t        j                  |t         j                        S r
  r  r  s            r-   r  zfregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creator.<locals>.inner_fn_requant  s    (<U(C8I$'%K5==9" 5	: '*ii	0A&BZ&O#/5;;#>1B()3emm2&JD$ 2C(,c2&JD$ +.++ckk#t6Ld*S'*||GU[['I IrA   r  r  ) r%  r   r&  r  r  r  r   r   r  r  r   r   rI  r   r   r   rr   r   r'  r   r   r   r   r   r:   r    r;   rB  rH  r   r<   r   s         @@@@@@@@@@@r-   rX   zLregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.epilogue_creatorB  s	   +!MM!NN!KK!JJ	0      (4'?'?'A$&NN$4	0>0J0J0L-+/(:#,#88#8/8/D/D/F,)0)<)<)>)0)<)<)>&*&6&6&8&*+*.*:*:*<K-( -(^ &(\\#/#:#:#<"'--%-#/#8#8#:	&
 &/)B * *(5*:	*J (5>>91;1G1G1I.I *,'1'E'E'G&2)E'1':':'<	*JN  *)C *ekk5::-FFC3=3I3I3K0J" *,'1'E'E'G&+kk)2):):$4*/./2</@*"
 (2':':'<	*J  *)rA   )r   r   r   rR   r(  r)  r*  )   r   r   r   rR   r(  r)  r*  r+  )
r,  r-  r   other_scaleother_zpbinary_post_opr   unary_post_opunary_post_op_argsunary_post_op_algorithmrr   c                 X    t         j                  j                  | j                            S rI   r   r   s    r-   r   zDregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<lambda>  r   rA   c                 X    t         j                  j                  | j                            S rI   r   r   s    r-   r   zDregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<lambda>  r   rA   c                 X    t         j                  j                  | j                            S rI   r   r   s    r-   r   zDregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<lambda>  r   rA   )r   r(  r)  c                 X    t         j                  j                  | j                            S rI   r   r   s    r-   r   zDregister_onednn_fusion_ops.<locals>.qlinear_binary.<locals>.<lambda>  r6  rA   rL  qlinear_binaryr   )-r\   r]   r   r%   r   r   r   r9   r   r'   r5   r6   r   r   r9  r4   r   r   r   r;  r<  r*   r(   r&   r8   r   r   r   r^   r_   r   
get_layoutsizer=  r>  r   r?  r@   r   rf   r   rb   r   r   r   )#rN   r   r   r   r    r   rB  rr   r'  r   r   x2_scalex2_zpr   r   r   r   r   rU   rp   x2_sizer@  rs   rt   r   rX   ru   r   rw   r   r:   r;   rH  r<   aten_mkldnn_qlinear_binarys#    `` ` `````    ```           @@@@@r-   rW  z2register_onednn_fusion_ops.<locals>.qlinear_binary  s=   6 ZZ\FkkmGv;#g,...6{Q;%#7R,-"r72;/0gr||4G}---''55LL>Y 6  !>7+;+;+=>> #7B/G7++-.&8T:TT8|ww22LL%++6V 3  |ww22LL%++6V 3  dBLL1DzS(((ww22LLU[[9 3  
 OOLLN~~5;;.:88>!!4  gg//@CCEKKPww22LLEKK@t}} 3  e#MMNN$  lln(GG||~5
 &b,7<<>\9 ]9 ||~H-1-=)4J*,G##v'?'?&3:}b<40FA}b @@F)) DOO-223q8"@@F)) (():):4==?)KL))$--/: .faG ww001G1G1IJH'002H
 1 %	7&!F* F* F*P $//< GT='4L$wbRVW!%T!1)9  < '<5 7|q $9$;!(&2!- ("#.!&",'4,< <%)F6N3.33< GT='4L$wbRVW	
 ! !))+qww/@/@@@@<<<M
 #La . < GT='4D$wb$O+F 6{Q;%#7f&Ks&KV__5Fr5J&KLMrA   zmkl::_mkl_linearrT   packed_worig_wc                d   g }t         j                  st         j                  rMt        |ddg      }t	        | ||      ^ }}} }t        || |      rt        j                  ||| ||gdddg       t        |      dk(  s
t               r'|j                  j                  | ||f|d |             |j                         t        j                  j                  v sJ |j                         t        j                  j                  v sJ d d	 d
}	t!        d|| ||g||	      }
|t#        |
|      }
|
S )Nr   r   rT   TrR   )rW   r   )r   
batch_sizec                 X    t         j                  j                  | j                            S rI   r   r   s    r-   r   zGregister_onednn_fusion_ops.<locals>.mkl_packed_linear.<locals>.<lambda>3      !2!21::<!@ rA   c                 X    t         j                  j                  | j                            S rI   r   r   s    r-   r   zGregister_onednn_fusion_ops.<locals>.mkl_packed_linear.<locals>.<lambda>4  rc  rA   )r   rR   packed_linearr   )r   r^   r_   r   r   r   r   rf   r]   r   r   r   r&   r   r'   r(   r   r   )rN   r^  r_  rP   ra  rU   rs   r   rt   r   rw   aten_mkl_linears              r-   mkl_packed_linearz5register_onednn_fusion_ops.<locals>.mkl_packed_linear  sI    /1&&&*B*B#*6Aq6#:L29<3/Q< -VQE'33#"&1$(+,a& w<1$(=(?NN',,&16Tj -   ((*agg.?.????(AGG,=,==== A@! %>#&)"/% = ^FrA   rI   )&r6   _C_has_mkldnn r~   r   r   mkldnn_linear_pointwiseLinearUnaryrl   binaryLinearBinaryonednnqlinear_pointwiseQLinearPointwisePT2EQLinearPointwiseBinaryPT2E_convolution_pointwise_convolution_pointwise_ _convolution_transpose_pointwiser   r   defaultqconv_pointwiser   r   boolrh   r   qconv2d_pointwisebinary_tensorhas_mklmkl_mkl_linearMKLPackedLinearr   r   r   )cpu_needs_realized_inputsr   r   r   r   r   r   r   r   r   r8  rW  rg  rf  r   r   r]  rA  r~   s                @@@@@@r-   register_onednn_fusion_opsr     s   xx#5II..'!$0077	$
  %7II..55'!$1188	%
! %7II..'!$99@@	%
! &8II..55'!$??FF	&
" II33II44II==II..!!))II,,%
! 
599++BB	C			 	 
D	6 
599++BBII	J			 	 		 
K	B 
599++CCJJ	K			 	 		 
L	B 
599++==	> ?	?	?	 ?	 
??	B 
599++==DD	EQU;	;	&;	+4;	9B;	 
F;	z 
599++LL	M			 	 
N	: 
40088	9&	&	&	 &	 	&	
 &	 &	 &	 &	 c&	 &	 &	 &	 &	  &	 &	  !&	 
:&	P 
599++;;QU	V1	1	 %	1	
 1	 1	 1	 
W1	f 
II..554

 
II..<<RV

F	F	 %	F	
 F	 F	 F	 F	



F	P 
599++==SW	X j	j	 %	j	
 j	 j	 j	 
Yj	X	 
II..554

 
II..<<RV

, '@	@	 %	@	
 @	 @	 @	 @	



@	D
 880		))" %(88??	O &,,UYY]]-F-FGuyy}}889 00#0 "0 I&	0 :0d 	"";<rA   )NNNN)/r#  typingr   r6   torch.utils._pytreeutils_pytreer    torch._inductor.kernel.mm_commonr   rj  r   r   codegen.cpp_gemm_templater   !codegen.cpp_grouped_gemm_templater	   codegen.cpp_utilsr   r   r"  r   r   r   r   r   r   r   select_algorithmr   r   r   r   r   virtualizedr   r   r   Tensortuplery  r@   rM   rh   r|   _inductor_lowering_functionr  r   rA   r-   <module>r     sc      $ $ 4  6 E 8    
 @ ) )+ll+<<+ \\+ ,,	+
 \\+ 4x556+\.%).. . x 	.
 H. x . ". .j 
==I= I=@ 59  1ArA   