
    rhr              
          d dl Z d dlmZmZ d dlmZ  e j                  ed      Z e j                  ed      Z e       rd dl	Z	d dl	m
Z e	j                  	 	 dEd       Ze	j                  	 	 dEd	       Ze	j                  	 	 	 	 dFd
       Ze	j                  	 	 	 	 dFd       Z e	j"                   e	j$                  ddidd       e	j$                  ddidd       e	j$                  ddidd       e	j$                  ddidd      gg       e	j                  	 	 dEd              Z e	j"                   e	j$                  ddidd       e	j$                  ddidd       e	j$                  ddidd       e	j$                  ddidd      gg       e	j                  	 	 dEd              Z e	j"                   e	j$                  ddidd      gg       e	j                  	 	 dEd              Z e	j"                   e	j$                  ddddd       e	j$                  ddddd       e	j$                  ddddd       e	j$                  ddddd      gg       e	j                  	 	 	 	 dGd              Zd Z e	j"                   e	j$                  ddidd       e	j$                  ddidd      gg dddei      e	j                  	 	 dEd              Ze	j                  	 	 dEd        Ze	j                  	 	 dEd!       Ze	j                  	 	 	 	 dGd"       Ze	j                  	 	 dEd#       Ze	j                  	 	 	 	 dGd$       Ze	j                  	 	 dEd%       Ze	j                  	 	 dEd&       Ze	j                  	 	 dEd'       Z e	j                  	 	 dEd(       Z!e	j                  d)        Z"e	j                  	 	 	 	 dHd*       Z#e	j                  	 	 	 	 dId+       Z$e	j                  	 	 	 	 dJd,       Z%e	j                  	 	 	 	 dJd-       Z&e	j                  dejN                  fd.       Z(e	j                  dejN                  fd/       Z)d d0l*m+Z+m,Z, e	j                  	 	 dEd1       Z-e	j                  	 	 dEd2       Z.e	j                  	 	 dEd3       Z/e	j                  	 	 dEd4       Z0e	j                  	 	 dEd5       Z1 e	j"                   e	j$                  ddddd6dd       e	j$                  ddd7dd6dd      gg d8      e	j                  d9ejN                  d:ejN                  d;ejN                  d<ejN                  fd=              Z2e	j                  dejN                  fd>       Z3e	j                  dejN                  fd?       Z4e	j                  dejN                  fd@       Z5e	j                  dejN                  fdA       Z6	 dKdBe7e8   dCe9fdDZ:yy)L    N)HAS_CUDAHAS_GPU)
has_tritonzrequires cudazrequires gpu)language
BLOCK_SIZEc                    t        j                  d      }||z  }|t        j                  d|      z   }||k  }t        j                  | |z   |      }	t        j                  ||z   |      }
|	|
z   }t        j                  ||z   ||       y Nr   axismasktl
program_idarangeloadstorein_ptr0in_ptr1out_ptr
n_elementsr   pidblock_startoffsetsr   xyoutputs               w/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/torch/testing/_internal/triton_utils.py
add_kernelr            mm#J&		!Z 88#GGGg%D1GGGg%D1Q
7"F6    c                    t        j                  d      }||z  }|t        j                  d|      z   }||k  }t        j                  | |z   |      }	t        j                  ||z   |      }
|	|
z
  }t        j                  ||z   ||       y r	   r   r   s               r   
sub_kernelr$   "   r!   r"   c                 .   t        j                  d      }||z  }|t        j                  d|      z   }||k  }	t        j                  | |z   |	      }
|dk(  r t        j                  ||z   |	      }|
|z   }n|
}t        j                  ||z   ||	       y Nr   r
   r   twor   )r   r   r   r   ARGS_PASSEDr   r   r   r   r   r   r   r   s                r   add_kernel_with_optional_paramr)   3   s     mm#J&		!Z 88#GGGg%D1%')5AUFF
7"F6r"   c                 :   t        j                  d      }||z  }|t        j                  d|      z   }	|	|k  }
t        j                  | |	|z  z   |
      }|dk(  r t        j                  ||	z   |
      }||z   }n|}t        j                  ||	|z  z   ||
       y r&   r   )r   r   r   r   strider(   r   r   r   r   r   r   r   r   s                 r   -add_kernel_with_none_param_and_equal_to_1_argr,   H   s     mm#J&		!Z 88#GGGg..T:%')5AUFF
7V++V$?r"            )
num_stages	num_warps   @   )configskeyc                    t        j                  d      }||z  }|t        j                  d|      z   }||k  }t        j                  | |z   |      }	t        j                  ||z   |      }
|	|
z   }t        j                  ||z   ||       y r	   r   r   s               r   add_kernel_autotunedr7   ^       " mm#J&		!Z 88#GGGg%D1GGGg%D1Q
7"F6r"   c                    t        j                  d      }||z  }|t        j                  d|      z   }||k  }t        j                  | |z   |      }	t        j                  ||z   |      }
|	|
z
  }t        j                  ||z   ||       y r	   r   r   s               r   sub_kernel_autotunedr:   x   r8   r"         c                    t        j                  d      }||z  }|t        j                  d|      z   }||k  }t        j                  | |z   |      }	t        j                  ||z   |      }
|	|
z   }t        j                  ||z   ||       y r	   r   )r   r   r   r   r   r   r   r   r   r   r   r   s               r   &add_kernel_autotuned_weird_param_orderr>      s      mm#J&		!Z 88#GGGg%D1GGGg%D1Q
7"F6r"   )BLOCK_SIZE_XBLOCK_SIZE_Yc                    t        j                  d      |z  }|t        j                  d|      d d d f   z   }||k  }	t        j                  d      |z  }
|
t        j                  d|      d d d f   z   }||k  }|}|}t        j                  | |||z  z   z   |	|z        }t        j                  | |||z  z   z   |	|z        }||z   }t        j                  ||||z  z   z   ||	|z         y )Nr      r   )r   r   r   
x_elements
y_elementsr?   r@   xoffsetxindexxmaskyoffsetyindexymaskx1y0tmp0tmp1tmp2s                     r   add_kernel_2d_autotunedrP      s    6 --"\1299Q5ag>>#--"\1299Q5dAg>>#www"
R"8955=Iwww"
R"8955=Id{
B*r/23T55=Ir"   c                     | S )N )r4   ___s      r   _dummy_early_config_prunerU      s    r"   
      early_config_prune)r4   r5   warmuprepprune_configs_byc                    t        j                  d      }||z  }|t        j                  d|      z   }||k  }t        j                  | |z   |      }	t        j                  ||z   |      }
|	|
z   }t        j                  ||z   ||       y r	   r   r   s               r   *add_kernel_autotuned_with_unsupported_argsr]      s    $ mm#J&		!Z 88#GGGg%D1GGGg%D1Q
7"F6r"   c                 $   t        j                  d      }||z  }|t        j                  d|      z   }||k  }	t        j                  | |z   |	      }
t        j                  ||z   |	      }|
|z   |z  }t        j                  ||z   ||	       y r	   r   )r   r   r   r   scaling_factorr   r   r   r   r   r   r   r   s                r   add_kernel_with_scalingr`      s     mm#J&		!Z 88#GGGg%D1GGGg%D1a%>)
7"F6r"   c                    t        j                  d      }||z  }t        j                  | |g|gt         j                        }t        j                  ||g|gt         j                        }||z   }t        j                  |||g       y Nr   r
   r   r   _experimental_descriptor_loadfloat32_experimental_descriptor_store	in_desc_ptr0in_desc_ptr1out_desc_ptrr   r   offsetabr   s	            r   add_kernel_with_tma_1d_old_apirn     s     mm#z!,,HLJJ	
 ,,HLJJ	
 Q
))H	
r"   c                 T   t        j                  d      }t        j                  d      }||z  }||z  }t        j                  | ||g||gt         j                        }	t        j                  |||g||gt         j                        }
|	|
z   }t        j                  ||||g       y Nr   r
   rB   rc   rh   ri   rj   r?   r@   pid_xpid_yoffset_xoffset_yr   r   r   s               r   add_kernel_with_tma_2d_old_apirv   "  s     1%1%<'<',,x <(JJ	
 ,,x <(JJ	
 Q
))x 	
r"   c                     t        j                  d      }||z  }t        j                  | |g      }t        j                  ||g      }||z   }t        j                  ||g|       y rb   r   r   load_tensor_descriptorstore_tensor_descriptorrg   s	            r   add_kernel_with_tma_1d_new_apir{   D  sr     mm#z!%%H
 %%H

 Q
""H	
r"   c                    t        j                  d      }t        j                  d      }||z  }||z  }t        j                  | ||g      }	t        j                  |||g      }
|	|
z   }t        j                  |||g|       y rp   rx   rq   s               r   add_kernel_with_tma_2d_new_apir}   _  s     1%1%<'<'%%x 
 %%x 

 Q
""x 	
r"   c                    |}|dz   }|dz   }	t         j                  j                  j                  || ||g||g| j                  j
                         t         j                  j                  j                  ||||g||g|j                  j
                         t         j                  j                  j                  |	|||g||g|j                  j
                         t         j                  j                  j                  |       t         j                  j                  j                  |       t         j                  j                  j                  |	       t        j                  d      }
t        j                  d      }|
|z  }||z  }t        j                  |||g||gt         j                        }t        j                  |||g||gt         j                        }||z   }t        j                  |	|||g       y )Nr-      )desc_ptrglobal_address	load_sizeglobal_size
element_tyr   r
   rB   )r   extracuda&experimental_device_tensormap_create2ddtyper   )experimental_tensormap_fenceproxy_acquirer   rd   re   rf   )a_ptrb_ptrc_ptrmn	workspacer   
a_desc_ptr
b_desc_ptr
c_desc_ptrrr   rs   rt   ru   rl   rm   r   s                    r    add_kernel_on_device_tma_old_apir   }  s    
_
_

<< !:.A{{-- 	= 	
 	<< !:.A{{-- 	= 	
 	<< !:.A{{-- 	= 	
 	??
K
??
K
??
K1%1%:%:% ,,x $JJ	
 ,,x $JJ	
 Q 	))x 	
r"   c                    t        j                  | ||g|dg||g      }t        j                  |||g|dg||g      }t        j                  |||g|dg||g      }	t        j                  d      }
t        j                  d      }|
|z  }||z  }t        j                  |||g      }t        j                  |||g      }||z   }t        j                  |	||g|       y )NrB   )baseshapestridesblock_shaper   r
   )r   make_tensor_descriptorr   ry   rz   )r   r   r   r   r   r   r   a_descb_descc_descrr   rs   rt   ru   rl   rm   r   s                    r    add_kernel_on_device_tma_new_apir     s    **a&F#Z0	
 **a&F#Z0	
 **a&F#Z0	
 1%1%:%:% %%x 
 %%x 
 Q 	""x 	
r"   c                     t        j                  d      }||z  }|t        j                  d|      z   }||k  }t        j                  | |z   |      }d|z  }	t        j                  ||z   |	|       y Nr   r
   r   r<   r   )
r   r   r   r   r   r   r   r   r   r   s
             r   mul2_kernelr     sn     mm#J&		!Z 88#GGGg%D1Q
7"F6r"   c                     t        j                  d      }||z  }|t        j                  d|      z   }||k  }t        j                  | |z   |      }d|z  }t        j                  | |z   ||       y r   r   )	ptrr   r   r   r   r   r   r   r   s	            r   mul2_inplace_kernelr     sl     mm#J&		!Z 88#GGC'M-Q
wT2r"   c                 6    t        j                  | dk\  | d      S )Nr   )r   where)r   s    r   	zero_negsr     s    xxQ1%%r"   c                 2   t        j                  d      }||z  }|t        j                  d|      z   }||k  }|dk(  rt        | ||       n|dk(  rt	        | | |||       t        j
                  | |z   |      }	t        j                  ||z   |	|       y )Nr   r
   r   )r   r    r   )r   r   r   r   r    r   r   )
r   r   r   r   
ACTIVATIONr   r   r   r   r   s
             r   indirection_kernelr     s     mm#J&		!Z 88#..
K<'w*TGGGg%D1
7"AD1r"   c                    t        j                  d      }t        j                  d      }||z  }||z  }	|t        j                  d|      z   }
|	t        j                  d|      z   }|d d d f   |z  |
d d d f   z   }|d d d f   |z  |
d d d f   z   }t        j                  | |z         }t        j                  ||z   |dz         y )Nr   r
   rB   g       @r   )in_ptrr   in_y_strideout_y_strideX_BLOCK_SIZEY_BLOCK_SIZExidyidx_starty_start	x_offsets	y_offsetssrc_offsetsdst_offsetssrcs                  r   double_strided_kernelr   ,  s     mm#mm#$$bii<88	bii<88	4(;6479KK4(<7)D!G:LLggf{*+
;&c	2r"   c                    t        j                  | t        j                  d|      z         }t        j                  |t        j                  d|      z         }t        j                  |g|t         j                        }t        j
                  dd|||gt         j                  dd      }t        j                  |t        j                  d|      z   |       y )Nr   shf.l.wrap.b32 $0, $1, $2, $3;
=r,r, r, rTrB   r   is_purepackr   r   r   fullint32inline_asm_elementwiser   	XYZr   BLOCKr   r   szs	            r   inline_asm_kernel_is_pure_truer   @  s     GGA		!U++,GGA		!U++,GGUGQ)%%,1I((
 	RYYq%((!,r"   c                    t        j                  | t        j                  d|      z         }t        j                  |t        j                  d|      z         }t        j                  |g|t         j                        }t        j
                  dd|||gt         j                  dd      }t        j                  |t        j                  d|      z   |       y )Nr   r   r   FrB   r   r   r   s	            r   inline_asm_kernel_is_pure_falser   Q  s     GGA		!U++,GGA		!U++,GGUGQ)%%,1I((
 	RYYq%((!,r"   c           
         t        j                  d      }||z  }t        j                  t        j                  | |gdg|g|gdg      dg      }t        j                  t        j                  ||gdg|g|gdg      dg      }||z   }	t        j                  t        j                  ||gdg|g|gdg      |	dg       y Nr   r
   rB   )r   r   r   r   r   order)boundary_checkr   r   r   make_block_ptrr   )
x_ptry_ptr
output_ptrr   r   r   r   r   r   r   s
             r   add_kernel_with_block_ptrr   b  s     mm#J&GG!l$'Lc 3

 GG!l$'Lc 3

 Q
!l$'Lc 3	
r"   c                 ,   t        j                  d      }||z  }t        j                  t        j                  | |dgddg|dg|dgddg      dg      }|}t        j                  t        j                  ||dgddg|dg|dgddg      |dg       y r   r   )r   r   r   r   r   r   r   r   s           r   kernel_with_block_ptr_2dr     s     mm#J&GG!1oA$a('O!f 3

 
!1oA$a('O!f 3	
r"   )r   r   c                     t        j                  d      }||z  }|t        j                  d|      z   }||k  }t        | |z   |      }	t        ||z   |      }
|	|
z   }t	        ||z   ||       y r	   r   r   s               r   add_kernel_with_importr     sw     mm#J&		!Z 88#7".7".Qgd3r"   c                 Z   t        j                  d      }||z  }|t        j                  d|      z   }||k  }t        j                  | |z   |      }	t        j                  ||z   |      }
t        j                  d      dk(  r|	|
z   }n|	|
z  }t        j                  ||z   ||       y r	   r   r   s               r   cond_op_kernelr     s     mm#J&		!Z 88#GGGg%D1GGGg%D1==q UFUF
7"F6r"   c                    t        j                  d      }||z  }|t        j                  d|      z   }||k  }t        j                  | |z   |      }	t        j                  ||z   |      }
|	|
z   }t        j                  ||z   ||       y r	   )r   r   r   r   
atomic_addr   s               r   atomic_add_kernelr     s     mm#J&		!Z 88#GGGg%D1GGGg%D1Q
g'd;r"   c                    t        j                  d      }||z  }|t        j                  d|      z   }||k  }t        j                  | |z   |      }	t        j                  ||z   |      }
t	        d      D ]"  }|	|
z   }t        j
                  ||z   ||       $ d}|dkD  r,|dz  }|	|
z   }t        j
                  ||z   ||       |dkD  r+y y )Nr   r
   r   r<   rB   )r   r   r   r   ranger   )r   r   r   r   r   r   r   r   r   r   r   ir   s                r   add_4_times_kernelr     s     mm#J&		!Z 88#GGGg%D1GGGg%D1q 	;AUFHHWw&T:	; !eFAUFHHWw&T: !er"   c                    t        j                  d      }||z  }|t        j                  d|      z   }||k  }t        j                  | |z   |      }	t        j                  ||z   |      }
|	|
z   }t        j                  ||z   ||       y r	   r   )r   r   r   r   r   r   r   r   r   r   r   r   s               r   add_kernel_out_of_order_fn2r     r!   r"   )BLOCK_SIZE_MBLOCK_SIZE_NBLOCK_SIZE_KGROUP_SIZE_M    )M_ptrNKr   r   r   r   c
                    t        j                  d      }
t        j                  |      }|dk(  r|dkD  rd}n|dk(  ry t        j                  ||      }t        j                  ||      }|	|z  }|
|z  }||	z  }t	        ||z
  |	      }||
|z  |z  z   }|
|z  |z  }||z  t        j
                  d|      z   |z  }||z  t        j
                  d|      z   |z  }t        j
                  d|      }| |d d d f   |d d d f   z   z   }||d d d f   |d d d f   z   z   }t        j                  ||ft         j                        }t        dt        j                  ||            D ]s  }t        j                  ||d d d f   |||z  z
  k  d      }t        j                  ||d d d f   |||z  z
  k  d      }t        j                  |||      }||z  }||z  }u |j                  t         j                        }||z  t        j
                  d|      z   }||z  t        j
                  d|      z   }||d d d f   z   |d d d f   z   } |d d d f   |k  |d d d f   |k  z  }!t        j                  | ||!       y )	Nr   r
   r   i   r   g        )r   otherr   )r   r   r   cdivminr   zerosre   r   dottofloat16r   )"r   r   r   r   r   r   r   r   r   r   r   M	num_pid_m	num_pid_nnum_pid_in_groupgroup_idfirst_pid_mgroup_size_mpid_mpid_noffs_amoffs_bnoffs_ka_ptrsb_ptrsaccumulatorkrl   rm   coffs_cmoffs_cnc_ptrsc_masks"                                     r   strange_config_matmul_kernelr    s   N mm#GGEN6lR'A!VGGA|,	GGA|,	')3**-9{2LA&6 6,FG''L8<'"))A|*DDI<'"))A|*DDI1l+'!T'*VD!G_<=&D/GD!G,<<=hhl;2::Nq"''!\23 	#AVD!G_q1|;K7K%KSVWAVAtG_q1|;K7K%KSVWA&&A{3Kl"Fl"F	# NN2::&,&1l)CC,&1l)CCD))GD!G,<<!T'"Q&747+;a+?@
(r"   c                     t        j                  d      }t        j                  d|      ||z  z   }t        j                  |gdt         j                        }t        j
                  | |z   |||k         y)z
        This kernel contains a triple-quote docstring w/ double quotes.
        Make sure that codegen sanitizes the docstring.
        r   r
         ?r   r   Nr   r   r   r   re   r   r   numelr   r   r   oness         r   #kernel_with_docstring_double_quotesr  _  s]     mm#))Az*S:-==ww
|S

;
7"Dw?r"   c                     t        j                  d      }t        j                  d|      ||z  z   }t        j                  |gdt         j                        }t        j
                  | |z   |||k         y)z
        This kernel contains a triple-quote docstring w/ single quotes
        Make sure that codegen sanitizes the docstring.
        To prevent it from being linted to double quotes: """!!!"""
        r   r
   r  r   r   Nr  r  s         r   #kernel_with_docstring_single_quotesr  j  s]     mm#))Az*S:-==ww
|S

;
7"Dw?r"   c                 2   t        j                  d      }t        j                  d|      ||z  z   }t        j                  | |z   ||k        }t        j                  dd|gt         j
                  dd      }t        j                  ||z   |||k         y )	Nr   r
   r   z{
            {
                cos.approx.f32 $0, $1;
                ex2.approx.f32 $0, $0;
            }
                =r, rTrB   asmconstraintsargsr   r   r   r   r   r   r   r   re   r   r   r   r  r   r   r   datacos_pows           r   kernel_inline_asm_double_quotesr!  v  s     mm#))Az*S:-==wwv'go>++ !**
 	7"G'E/Br"   c                 2   t        j                  d      }t        j                  d|      ||z  z   }t        j                  | |z   ||k        }t        j                  dd|gt         j
                  dd      }t        j                  ||z   |||k         y )	Nr   r
   r   z
            {
                // double quotes to pacify the linter """!!!"""
                cos.approx.f32 $0, $1;
                ex2.approx.f32 $0, $0;
            }
                r  TrB   r  r  r  s           r   kernel_inline_asm_single_quotesr#    s     mm#))Az*S:-==wwv'go>++ !**
 	7"G'E/Br"   block_sizesnew_apic           	      :   |r4t         j                  j                  j                  j	                  | |      S t        |      dk(  rZt         j                  j                  j                  | j                         | j                  d      |d   | j                               S t        |      dk(  sJ t         j                  j                  j                  | j                         | j                  d      | j                  d      |d   |d   | j                               S )NrB   r   r<   )tritontoolstensor_descriptorTensorDescriptorfrom_tensorlenexperimental_descriptorcreate_1d_tma_descriptordata_ptrsizeelement_sizecreate_2d_tma_descriptor)tensorr$  r%  s      r   create_tensor_descriptor_shimr4    s     <<11BBNN  ;1$||;;TTOO%KKNN'')	  ;'1,,,||;;TTOO%KKNKKNNN'') r"   )r   tl.constexpr)r(   r5  r   r5  )r?   r5  r@   r5  )r   r5  r   r5  )r   r5  r   r5  )r   r5  r   r5  )T);unittest&torch.testing._internal.inductor_utilsr   r   torch.utils._tritonr   
skipUnlessrequires_cudarequires_gpur'  r   r   jitr    r$   r)   r,   autotuneConfigr7   r:   r>   rP   rU   r]   r`   rn   rv   r{   r}   r   r   r   r   r   r   r   r   r   	constexprr   r   triton.languager   r   r   r   r   r   r   r  r  r  r!  r#  listintboolr4  rR   r"   r   <module>rD     s@    D * $##Ho>"x""7N;<% ZZ7
 #7 7  ZZ7
 #7 7  ZZ7
 $7 #7 7( ZZ@ $@ #@ @* V__FMM<-!qIFMM<-!qIFMM<,aHFMM<,aH	
  ZZ7
 #7 7  V__FMM<-!qIFMM<-!qIFMM<,aHFMM<,aH	
  ZZ7
 #7 7  V__FMM<,aH
 	 ZZ7 #	7 7$ V__FMM!$c:qTU FMM!$c:qTU FMM!#R8QRS FMM!#R8QRS
 " ZZJ %J %J #$J, V__FMM<-!qIFMM<,aH
 .0IJ	 ZZ7
 #7 	7  ZZ7 #7 7" ZZ
 #	
 
< ZZ
 %	

 %
 
B ZZ
 #	
 
4 ZZ
 %	

 %
 
: ZZA
 #A
 A
F ZZ4
 #4
 4
l ZZ7 #	7 7 ZZ3 #3 3 ZZ& & ZZ2 #	2
 #2 2$ ZZ3
 %3 %3 3& ZZ-"-+9- -  ZZ-"-+9- -  ZZ+

 LL+
 +
Z ZZ
 LL	
 
B ,ZZ4
 #4 4  ZZ7
 #7 7& ZZ<
 #< <  ZZ;
 #; ;, ZZ7
 #7 7  V__FMM$&$&$&$%	 	 FMM$'$&$&$%	 	
,  /2 ZZ1) ll1) ll1) ll1) ll1) 341)f ZZ@ @ @ ZZ	@ 	@ 	@ ZZC,.LLC C* ZZC,.LLC C0 9=!#Y15q r"   