
    rhv                      U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlmZmZ d dlmZmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZ d dlmZm Z  d dl!Z!d dl"Z"d dl#Z"d dl$m%Z% d d	l&m'Z( d d
l)m*Z* d dl+m,Z, d dl-m.Z/ d dl0m1Z1m2Z2m3Z3 d dl4m5Z5m6Z6 ddl7m8Z8m9Z9 ddl:m;Z; ddl<m=Z=m>Z> ddl?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZK ddlLmMZMmNZNmOZOmPZPmQZQmRZR ercd dlSmTZTmUZUmVZV d dl#mWZW ddlXmYZY ddlZm[Z[m\Z\m]Z]m^Z^ ddl_m`Z` ddlambZbmcZcmdZd ddlemfZf  e d      Zgeeec   gebf   Zheief   Zjeeke!j                  f   ZmekZne"j                  j                  eqd      Zr ej                  eq      ZtddZuej                   G d  d!             Zw G d" d#ej$                        Zx G d$ d%e      Zy eEd&'       G d( d)ey             Zz G d* d+      Z{ej                   G d, d-             Z|ej                   G d. d/             Z}ej                   G d0 d1             Z~ej                   G d2 d3             Zej                   G d4 d5             Zeeze|e}ee~f   Zi Zd6ed7<    G d8 d9      Zi Zd:ed;<   i Zd<ed=<   	 	 d	 	 	 	 	 	 	 	 	 	 	 dd>Z G d? d@e      Z	 	 	 	 ddAZ	 	 	 	 	 	 ddBZddCZ	 d		 	 	 	 	 d
dDZddEZej                  ddF       Z	 	 	 	 	 	 	 	 ddGZ	 	 	 	 	 	 ddHZddIZe"j&                  e"j(                  e"j*                  e"j(                  ie"j,                  e"j.                  e"j0                  e"j2                  e"j4                  e"j6                  e"j8                  e"j:                  e"j<                  e"j>                  e"j@                  fD  ci c]  } | |  c} ZdJedK<   	 	 	 	 	 	 	 	 ddLZ	 	 	 	 	 	 	 	 ddNZ G dO dP      Z G dQ dRe/      Z. G dS dT      Z ejL                  dUejN                  V      ZddWZ G dX dYe=eeNe         Zej                   G dZ d[             Z edi d\ ee%jZ                  d] d^_      d` ee%jZ                  da db dcd      de ee%jZ                  df dg dhd      di ee%jZ                  dj dk dld      dm ee%jZ                  dn do dpd      dq ee%jZ                  dr ds dqt      du ee%jZ                  dv dw dxd      dy ee%jZ                  dz d{ d| dy}      d~ ee%jZ                  d d~_      d ee%jZ                  d d_      d ee%jZ                  d d_      d ee%jZ                  d d_      d ee%jZ                  d d d d      d ee%jZ                  d d dt      d ee%jZ                  d d dd      d ee%jZ                  d d_      d ee%jZ                  d d_      d ee%jZ                  d d dd      d ee%jZ                  d d dd      d ee%jZ                  d d_      d ee%jZ                  d d_      d ee%jZ                  d d_      d ee%jZ                  d d_      d ee%jZ                  d d_      d ee%jZ                  d d_      d ee%jZ                  d d_      d ee%jZ                  d d_      d ee%jZ                  d d_      d ee%jZ                  d d_      d ee%jZ                  d d¬_      d ee%jZ                  dĄ dŬ_      d ee%jZ                  dǄ dȬ_      d ee%jZ                  dʄ dˬ_      d ee%jZ                  d̈́ dά_      d ee%jZ                  dЄ dѬ_      d ee%jZ                  dӄ dԬ_      d ee%jZ                  dք d׬_      d ee%jZ                  dل dڬ_      d ee%jZ                  d܄ dݬ_      d ee%jZ                  d߄ d_      Zded<   ddZ G d deA      Z G d deD      Z G d de      Zej                   G d d             Z G d d      Z e       Z G d d      Z G d d      Z e dek      Z e dMee      Zeree"jv                  ePeeeedf   f   f   Z G d deeef         Z G d d      Z G d deee         Zej                   G d d             Zej                  dd       Z G d d       Z G d de>      Zyc c} w (      )annotationsN)ABCabstractmethod)autoEnum)chain)	AnyCallablecastClassVarGeneric
NamedTupleOptionalTYPE_CHECKINGUnion)SelfTypeVar)ELEMENTWISE_TYPE_PROMOTION_KIND)_pytree)
OrderedSet)int_oo)PythonPrinter)free_symbol_is_typesymbol_is_typeSymT)bound_sympyValueRanges   )configmetrics)DtypePropagationOpsHandler)BasicMathOpsMixinDefaultHandler)boolean_opsDeferredLineBasegenerate_assertget_current_backendIndentedBufferir_dataclass
ScopedDict	sympy_dotsympy_index_symbol
sympy_substriton_typeunique)ops
OpsHandlerOpsValueReductionType	StoreModeV)IteratorMutableMappingSequence)GraphModule)CustomGraphModulePass)BufferChoiceCallerFixedLayoutIRNodeLoopBody)BaseScheduling	SchedulerSchedulerNode   PythonWrapperCodegen_Tschedulec                x    t         j                  t        j                        rt         j	                  d|        y y )NzData type propagation: %s)schedule_logisEnabledForloggingDEBUGdebug)msgs    q/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/torch/_inductor/codegen/common.pydata_type_loggerrQ   W   s*      /6< 0    c                  Z    e Zd ZU dZded<   ded<   ddZedd       ZddZedd	       Z	y
)FileBackedGraphModulez
    Output of FX wrapper codegen. Exposes the same methods as ModuleType, but these
    map back to a GraphModule instead of Python source.
    r9   gmzCallable[..., Any]compiled_fnc                &   t        j                  ddd      | _         t        j                  t        j
                  | j                   j                         | j                   5 }|j                  | j                         d d d        y # 1 sw Y   y xY w)Nzw+z.pyF)modesuffixdelete)	tempfileNamedTemporaryFileatexitregisterosremovenamewritevalue)selffs     rP   __post_init__z#FileBackedGraphModule.__post_init__f   si     !33eE
 			4==#5#56]] 	 aGGDJJ	  	  	 s   "BBc                .    | j                   j                  S N)r[   ra   rd   s    rP   __file__zFileBackedGraphModule.__file__p   s    }}!!!rR   c                      | j                   | S rh   )rV   rd   argss     rP   callzFileBackedGraphModule.callt   s    t&&rR   c                .    | j                   j                  S rh   )rU   coderi   s    rP   rc   zFileBackedGraphModule.valuew   s    ww||rR   NreturnNonerr   str)rm   	list[Any]rr   r	   )
__name__
__module____qualname____doc____annotations__rf   propertyrj   rn   rc    rR   rP   rT   rT   \   sF    
 	O##  " "'  rR   rT   c                  <    e Zd ZdZdZdZedd       Zedd       Zy)	WorkspaceZeroModer   rD   r   c                    | |k(  s|t         j                  k(  r| S | t         j                  k(  r|S t        d| d|d      )NzWorkspaceZeroMode.combine(, ))r   UNINITIALIZEDNotImplementedErrorabs     rP   combinezWorkspaceZeroMode.combine   sK    6Q+999H!///H!$>qe2aU!"LMMrR   c                F    | rt         j                  S t         j                  S rh   )r   ZERO_ON_CALLr   )	zero_fills    rP   	from_boolzWorkspaceZeroMode.from_bool   s    $111 ...rR   N)r   r   r   r   rr   r   )r   boolrr   r   )	rw   rx   ry   r   r   ZERO_PER_GRAPHstaticmethodr   r   r}   rR   rP   r   r   |   s9    MLNN N / /rR   r   c                  4    e Zd ZdZedd       Zedd       Zy)CodegenSymbolzP
    An IR object possibly corresponding to a variable in the wrapper code.
    c                     y rh   r}   ri   s    rP   get_namezCodegenSymbol.get_name       rR   c                     y rh   r}   ri   s    rP   get_examplezCodegenSymbol.get_example   r   rR   Nrt   rr   z!Union[torch.Tensor, sympy.Symbol])rw   rx   ry   rz   r   r   r   r}   rR   rP   r   r      s/        rR   r   T)frozenc                     e Zd ZU dZded<   ded<   ded<   ded	<   d
Zded<   ej                  Zded<   e	ddd       Z
e	dd       Ze	d d       Ze	d d       Zd!dZeZd"dZd#dZd$dZed$d       ZeZeZeZd%dZd&dZd&dZd'dZd(dZy))WorkspaceArga2  A temporary buffer used for a single kernel, then discarded.

    Not registered as a traditional buffer since there are no users,
    so it would be dead code eliminated.

    Args:
        nbytes: The size of the buffer in bytes.
        zero_fill: Whether the buffer should be initialized to zero.

    
sympy.Exprcountr   	zero_modetorch.devicedeviceru   
outer_namews_ptr
inner_nametorch.dtypedtypec                P    |  t        t        j                  j                         S rh   )nextr5   graphworkspace_id)prefixs    rP   unique_namezWorkspaceArg.unique_name   s!    $qww334566rR   c                    | j                   |j                   k(  xr4 | j                  |j                  k(  xr | j                  |j                  k(  S rh   )r   r   r   r   s     rP   can_joinzWorkspaceArg.can_join   s@     LLALL(XQWW-?XAHHPQPXPXDX	
rR   c                    t        | j                  |j                  z   t        j                  | j                  |j                        | j
                  | j                  | j                  | j                        S N)r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   r   s     rP   joinzWorkspaceArg.join   sS    ''AGG#'//Q[[I''88||||
 	
rR   c                   | j                   |j                   k(  r2| j                  |j                  k(  r| j                  |j                  k(  sJ t        t	        j
                  | j                  |j                        t        j                  | j                  |j                        | j                   | j                  | j                  | j                        S r   )r   r   r   r   sympyMaxr   r   r   r   r   r   s     rP   maximumzWorkspaceArg.maximum   s     GGqww188qxx#7ALLALL<X	
X))AGGQWW-'//Q[[I''88||||
 	
rR   c                    | j                   S rh   r   ri   s    rP   
get_devicezWorkspaceArg.get_device   s    {{rR   c                    | j                   S rh   r   ri   s    rP   	get_dtypezWorkspaceArg.get_dtype   s    zzrR   c                >    | j                         j                         S rh   )
get_layoutr   ri   s    rP   r   zWorkspaceArg.get_example   s     ,,..rR   c                f    ddl m}  || j                  | j                  | j                  gdg      S )Nr   )r=   rD   )r   r   sizestride)irr=   r   r   r   )rd   r=   s     rP   r   zWorkspaceArg.get_layout   s.    $;;****3	
 	
rR   c                "    | j                         S rh   )r   ri   s    rP   layoutzWorkspaceArg.layout   s      rR   c                6    t         j                  j                  S rh   )r   SZerori   s    rP   
get_offsetzWorkspaceArg.get_offset   s    ww||rR   c                    | j                   gS rh   )r   ri   s    rP   get_sizezWorkspaceArg.get_size   s    

|rR   c                8    t         j                  j                  gS rh   )r   r   Oneri   s    rP   
get_stridezWorkspaceArg.get_stride   s    }rR   c                    | j                   S rh   )r   ri   s    rP   r   zWorkspaceArg.get_name   s    rR   c                    g S rh   r}   ri   s    rP   get_inputs_that_alias_outputz)WorkspaceArg.get_inputs_that_alias_output   s    	rR   N)
workspace_)r   ru   rr   ru   )r   r   r   r   rr   r   )r   r   r   r   rr   r   )rr   r   )rr   r   r   )rr   r=   )rr   r   )rr   list[sympy.Expr]rt   )rr   	list[str])rw   rx   ry   rz   r{   r   torchuint8r   r   r   r   r   r   r   get_device_or_errorr   r   r   r|   r   get_output_specmaybe_get_output_specmaybe_get_layoutr   r   r   r   r   r}   rR   rP   r   r      s    	   OJE;$7 7 
 

 
 
 
 
 %/
 ! ! !O&!rR   r   c                      e Zd ZddZddZy)TritonScratchWorkspacec                     || _         || _        y rh   )r   _generate_dtype_str)rd   r   generate_dtype_strs      rP   __init__zTritonScratchWorkspace.__init__  s    	#5 rR   c                "    | j                         S rh   )r   ri   s    rP   r   z)TritonScratchWorkspace.generate_dtype_str  s    ''))rR   N)r   intr   Callable[..., str]rt   )rw   rx   ry   r   r   r}   rR   rP   r   r     s    6*rR   r   c                  p    e Zd ZU ded<   ded<   ded<   ej
                  j                  Zded<   dZd	ed
<   y)	TensorArgru   ra   bufferr   r   r   offsetNOptional[str]alias_of)	rw   rx   ry   r{   r   r   r   r   r   r}   rR   rP   r   r     s.    
IKFJ%"Hm"rR   r   c                  4    e Zd ZU ded<   ded<   edd       Zy)SizeArgru   ra   r   exprc                     y rh   r}   ri   s    rP   r   zSizeArg.alias_of  s    rR   Nrr   r   )rw   rx   ry   r{   r|   r   r}   rR   rP   r   r     s    
I
 rR   r   c                      e Zd ZU ded<   y)ConstexprArgru   ra   Nrw   rx   ry   r{   r}   rR   rP   r   r     s    
IrR   r   c                  6    e Zd ZU ded<   ded<   ded<   ded<   y)	TMADescriptorArgru   ra   api_typezOptional[list[sympy.Expr]]block_shapeOptional[torch.dtype]r   Nr   r}   rR   rP   r   r   $  s    
IM++  rR   r   c                  0    e Zd ZU ded<   ded<   dZded<   y)DeviceCodegenSchedulingConstructor
schedulingWrapperConstructorwrapper_codegenNOptional[WrapperConstructor]cpp_wrapper_codegen)rw   rx   ry   r{   r   r}   rR   rP   r   r   ,  s    %%''8<5<rR   r   zdict[str, DeviceCodegen]device_codegensc                      e Zd ZddZddZddZddZddZddZddZ	ddZ
dd	Zdd
ZddZddZddZddZddZddZ	 	 	 	 	 	 ddZy)DeviceOpOverridesc                    t         rh   r   rd   ra   s     rP   import_get_raw_stream_asz*DeviceOpOverrides.import_get_raw_stream_as9      !!rR   c                    t         rh   r   rd   
device_idxs     rP   
set_devicezDeviceOpOverrides.set_device<  r  rR   c                    t         rh   r   ri   s    rP   synchronizezDeviceOpOverrides.synchronize?  r  rR   c                    t         rh   r   r  s     rP   device_guardzDeviceOpOverrides.device_guardB  r  rR   c                    t         rh   r   ri   s    rP   cpp_device_guardz"DeviceOpOverrides.cpp_device_guardE  r  rR   c                    t         rh   r   ri   s    rP   cpp_aoti_device_guardz'DeviceOpOverrides.cpp_aoti_device_guardH  r  rR   c                    t         rh   r   ri   s    rP   cpp_stream_guardz"DeviceOpOverrides.cpp_stream_guardK  r  rR   c                    t         rh   r   ri   s    rP   cpp_aoti_stream_guardz'DeviceOpOverrides.cpp_aoti_stream_guardN  r  rR   c                    t         rh   r   ri   s    rP   cpp_getStreamFromExternalz+DeviceOpOverrides.cpp_getStreamFromExternalQ  r  rR   c                    t         rh   r   ri   s    rP   kernel_headerzDeviceOpOverrides.kernel_headerT  r  rR   c                    t         rh   r   ri   s    rP   kernel_driverzDeviceOpOverrides.kernel_driverW  r  rR   c                    t         rh   r   ri   s    rP   cpp_stream_typez!DeviceOpOverrides.cpp_stream_typeZ  r  rR   c                    t         rh   r   ri   s    rP   aoti_get_streamz!DeviceOpOverrides.aoti_get_stream]  r  rR   c                    t         rh   r   ri   s    rP   cpp_kernel_typez!DeviceOpOverrides.cpp_kernel_type`  r  rR   c                    t         rh   r   ri   s    rP   cpp_device_ptrz DeviceOpOverrides.cpp_device_ptrc  r  rR   c                    t         rh   r   ri   s    rP   tma_descriptor_helpersz(DeviceOpOverrides.tma_descriptor_helpersf  r  rR   c                    t         rh   r   )rd   idx	workspaces      rP   cpp_global_scratchz$DeviceOpOverrides.cpp_global_scratchi  s
     "!rR   Nra   ru   rr   ru   )r  r   rr   ru   rt   )r$  r   r%  r   rr   zOptional[tuple[list[str], str]])rw   rx   ry   r  r  r  r
  r  r  r  r  r  r  r  r  r  r  r   r"  r&  r}   rR   rP   r   r   8  so    """"""""""""""""""#9"	("rR   r   zdict[str, DeviceOpOverrides]device_op_overrides_dictz*dict[str, Optional[CustomGraphModulePass]]custom_backend_passesc                >    t        |||      t        | <   |t        | <   y rh   )r   r   r)  )r   device_schedulingdevice_wrapper_codegendevice_cpp_wrapper_codegendevice_custom_passs        rP   register_backend_for_devicer/    s)     ,13MOF %7&!rR   c                      e Zd Z e       Z e       Z e       Z e       Z e       Z e       Z	 e       Z
 e       Z e       Z e       Zy)BackendFeatureN)rw   rx   ry   r   FOREACH	BUCKETIZEINPLACE_BUFFERSMASKED_SCATTER_WITH_INDEXSCANSORTTUPLE_REDUCTIONPREFER_STORE_LOOP_ORDERTRITON_TEMPLATESREDUCE_TO_SINGLE_ELEMENTr}   rR   rP   r1  r1    sL    fGIfO $6D6DfO"fv#vrR   r1  c                :   | 
t               S t                t        | t        j                        r| j
                  }n7t        | t              sJ t        |              | }t        j                  |      } t        |      }|sJ  |d       }|j                  |       S rh   )	r   init_backend_registration
isinstancer   r   typeru   get_scheduling_for_deviceget_backend_features)r   device_typescheduling_ctorr   s       rP   rA  rA    s     ~|&%,,'kk&#&4V4&k*/<O? &J**622rR   c                @    t        |t              sJ |t        |       v S )zSee also V.graph.has_feature)r>  r1  rA  )r   features     rP   has_backend_featurerF    s%     g~...*6222rR   c                <    | t         v rt         |    j                  S d S rh   )r   r   r   s    rP   r@  r@    s     17?1J?6"--TPTTrR   c                Z    | t         v r#t         |    }|r|j                  S |j                  S y rh   )r   r   r   )r   cpp_wrapperwrapper_codegen_objs      rP   get_wrapper_codegen_for_devicerK    sA      -<V-D   33	
 %44	

 rR   c                (    | t         v r	t         |    S d S rh   )r)  r   s    rP   "get_custom_backend_pass_for_devicerM    s    ,26K,K (UQUUrR   c                    ddl m}  ddlm} ddlm} ddlm} ddlm	} ddl
m} ddlm} dd	lm} dd
lm} ddlm}	 t)        d      3| ||dt+        dfd|	t,        j.                  j0                  r|n|       t)        d      ||dt+        dfd|	|       t)        d      t+        d||	|       t)        d      t+        d||	|       t2        j4                  j7                         }
|
dk7  rCt)        |
      7ddlm} 	  |d      } |d      } |d      }|r|r|rt+        |
|||       y y y y y y # t<        $ r Y y w xY w)NrD   )CppScheduling)CppWrapperCpu)CppWrapperCpuArrayRef)CppWrapperGpu)CppWrapperMps)CUDACombinedScheduling)HalideScheduling)MetalScheduling)TritonSchedulingrE   cpu)cpphalidetritonc                6     t         j                     |       S rh   )r   cpu_backend)r   cpu_backendss    rP   <lambda>z+init_backend_registration.<locals>.<lambda>  s    ?|F,>,>?
K rR   cuda)r[  rZ  c                6     t         j                     |       S rh   )r   cuda_backend)r   cuda_backendss    rP   r_  z+init_backend_registration.<locals>.<lambda>  s    A}V-@-@A*M rR   xpumpsprivateuseoner   )_get_custom_mod_func
SchedulingrF   CppWrapperCodegen)rY  rO  cpp_wrapper_cpurP  cpp_wrapper_cpu_array_refrQ  cpp_wrapper_gpurR  cpp_wrapper_mpsrS  cuda_combined_schedulingrT  rZ  rU  re  rV  r[  rW  wrapperrF   r@  r/  r   aot_inductorallow_stack_allocationr   _C_get_privateuse1_backend_name torch.utils.backend_registrationrg  RuntimeError)rO  rP  rQ  rR  rS  rT  rU  rV  rW  rF   private_backendrg  r+  r   r   r^  rc  s                  @@rP   r=  r=    sp   ".@..@($(- '/ &&

 	$K ""99 "	
 !(0 -&
 	$M 		
 !'/# 		
 !'/# 		
 hh<<>O?*%o6>I	 4\ B23IJO"67J"K _9L+#%#'	 :M_  ? 	+   		s   ,D7 7	EEc                L    ddl m} g | t        ||j                  |            S )Nr   )FlexibleLayout)r   rx  r+   contiguous_strides)index
index_varssizesrx  s       rP   index_prevent_reorderingr}  !  s,    
 $ UUTIj.*K*KE*RSTTrR   c                    |t         | <   y rh   )r(  )r   device_op_overridess     rP   register_device_op_overridesr  ,  s     (;V$rR   c                    t        | t              sJ t        |              t        sddlm}m} ddlm} ddl	m} t        |    S )NrD   )cpu_device_op_overridesmps_device_op_overrides)r  )
r>  ru   r?  r(   r  r  r`  r  rd  )r   r  r  r  xpu_op_overridess        rP   get_device_op_overridesr  2  s2    fc"0DL0"#F-@#F++rR   zdict[torch.dtype, torch.dtype]DTYPE_TO_COMPUTATION_DTYPEc                r   | t               v rt        j                  S | dv rd|v r|d   S |d   S | dv rt        j                  S | dv rt        j                  S | dk(  rd|v r|d   S |d   S | dk(  rd|v r|d   S |d   S | d	v r$|d   }t
        j                  j                  |      S | d
k(  rd|v r|d   S |d   S y)zK
    Given op name and a list of input dtypes, deduce the output dtype
    )to_dtype
index_exprr   )randrandn)	get_index	randint64	load_seed	reductionrD   constant)loadstorestore_reductionto_dtype_bitcastN)r$   r   r   floatint64r5   r   r   )op_namerm   kwargsbuf_names       rP   deduce_output_dtype_by_namer  S  s
    +-zz	  
 #*V"3vgAbA	  
 {{	  

 {{	K	")V"3vg@a@	J	")V"3vgAbA	  

 7ww  **	&	&")V"3vgAbArR   CSEVariableTypec                   t               }t        j                  j                  r'|dk(  r"| j	                  d| dt        |       d       y t        j                  j                  r|dk(  rddlm}m	} t        ||      sJ t        |             |t        j                  k(  r|j                  rd| d	}n.d
| d| d}n$d| d}|j                  rd| d}d| d||    d}| j	                  d| d       y y y )Nr[  ztl.static_assert(z
.dtype == r   rY  rD   )CppCSEVariableDTYPE_TO_CPPzIsVecMaskType<decltype(z	)>::valuezstd::is_same_v<decltype(z$), bool> || std::is_same_v<decltype(z), int>z	decltype(z	typename z::value_typezstd::is_same_v<r   >zstatic_assert(z);)r'   r   test_configsruntime_triton_dtype_assert	writeliner.   static_cpp_dtype_assert	cpp_utilsr  r  r>  r?  r   r   is_vec)r   varr   backendr  r  
is_same_dt
c_var_types           rP   check_dtyper  }  s    "#G667h;N,SEK<N;OqQR				4	4E9I;#~.9S	9.EJJzz6se9E
  8u<`ad`eelm
$SE+Jzz(LA
*:,be9L8MQOJ>*R89! :J	4rR   c                  `    e Zd Zd
dZddZddZddZddZddZe	dd       Z
e	dd       Zy	)DataTypePropagationc                    || _         d|j                  j                  i| _        |j                  j                         D ]  \  }}|j                  | j                  |<     y Nroot)body
root_blockr   graphs	subblocksitems)rd   r  kvs       rP   r   zDataTypePropagation.__init__  sU    	DOO))B
 NN((* 	%DAqWWDKKN	%rR   c                   |j                   }|D cg c]9  }t        |t        j                  j                        s(|j
                  dk7  s8|; }}t        |      dk(  ry t        d |D              }|sy t        j                  t        j                  |D cg c])  }|j                  t        j                     j                  + c}      S c c}w c c}w )Nplaceholderr   c              3     K   | ]K  }t         j                  |j                  v xr) |j                  t         j                     j                  d u M y wrh   )OptimizationContextkeymetar   ).0ns     rP   	<genexpr>zBDataTypePropagation.deduce_node_dtype_by_inputs.<locals>.<genexpr>  sS      )
   ##qvv- B*../55TAB)
s   AA)all_input_nodesr>  r   fxNodeoplenall	functoolsreducepromote_typesr  r  r  r   )rd   nodeinputsr  input_nodesall_input_nodes_propagateds         rP   deduce_node_dtype_by_inputsz/DataTypePropagation.deduce_node_dtype_by_inputs  s    %%
Auxx}}!=!$$-BWA
 
 {q %( )
 !)
 &
"
 *<GHqQVV'++,22H
 	

  Is   )CCC.C
c                b    | j                   |j                     }| j                  |      }|sJ |S rh   )r  targetpropagate_graph)rd   r  	sub_graphr   s       rP   deduce_node_dtype_by_subgraphz1DataTypePropagation.deduce_node_dtype_by_subgraph  s0    KK,	$$Y/urR   c                   |j                   dk(  ry |j                  dk(  rt        |j                        dk7  ry |j                  t        j
                  k(  rT|j                  d   }t        |t        j                  j                        sJ t        |             | j                  |      S t        |j                  t              sJ t        |j                               |j                  j                  d      r| j                  |      S t        |j                  g|j                  i |j                   x}	 |S | j#                  |      S )Nr  outputrD   r   masked_subblock)r  r  r  rm   operatorgetitemr>  r   r  r  r?  deduce_node_dtyperu   
startswithr  r  r  r  )rd   r  node_argoutput_dtypes       rP   r  z%DataTypePropagation.deduce_node_dtype  s   77m#;;("s499~':;;(***yy|Hh6FXF6))(33$++s+>T$++->>+;;!!"3455d;; 8 ++ L
   //55rR   c                n   |j                   sJ d }|j                   D ]  }t        j                  |j                  v r|j                  t        j                     }n
t               }| j	                  |      |_        ||j                  t        j                  <   |j                  dk(  s|j
                  } |S )Nr  )nodesr  r  r  r  r   r  )rd   r   graph_dtyper  opt_ctxs        rP   r  z#DataTypePropagation.propagate_graph  s    {{{-1 KK 		,D"&&$))3))$7$;$;<-/ 2248GM18DII)--.{{h&%mm		, rR   c                >    | j                  | j                  d         S r  )r  r  ri   s    rP   	propagatezDataTypePropagation.propagate  s    ##DKK$788rR   c                .     | |      j                         S rh   )r  )clsr  s     rP   propagate_loopbodyz&DataTypePropagation.propagate_loopbody  s    4y""$$rR   c                    ddl m} ddlm} t	        ||      sJ t        |             t	        |j                  |      sJ t        |j                               t        j                  |j                        S )Nr   r?   )rC   )		loop_bodyr@   	schedulerrC   r>  r?  _bodyr  r  )r  r  r@   rC   s       rP   propagate_scheduler_nodez,DataTypePropagation.propagate_scheduler_node  sX    (-$.:T
:.$**h/Adjj1AA/"55djjAArR   N)r  r@   rr   rs   )r  torch.fx.Noderr   r   )r  r  rr   r   )r   ztorch.fx.Graphrr   r   )rr   r   )r  r@   rr   r   )r  rC   rr   r   )rw   rx   ry   r   r  r  r  r  r  classmethodr  r  r}   rR   rP   r  r    sJ    %
*6:$9 % % B BrR   r  c                  6     e Zd Zddd	 	 	 	 	 	 	 d fdZ xZS )r   T)simplifypc                   |r]t        |t        j                        rCt        t        j
                  d      r)t        j
                  j                  j                  |      }t        | %  |      S )Nsizevars)
r>  r   Exprhasattrr5   r   r  r  superdoprint)rd   r   r  r  	__class__s       rP   r  zPythonPrinter.doprint  sK     
44*9U77##,,T2Dwt$$rR   )r   r   r  r   r  r   rr   ru   )rw   rx   ry   r  __classcell__r  s   @rP   r   r     s2    48D%%-1%=A%	% %rR   r   c                  T   e Zd ZdZedd       Zedd       Zedd       Zedd       Zedd       Z	edd       Z
edd       Zedd	       Zedd
       Zedd       Zedd       Zedd       Zedd       Zedd       Zedd       Zedd       Zedd       Zedd       Zy)OpDecompositionsz!
    Decomposes inductor ops
    c                    | S rh   r}   )rc   s    rP   identityzOpDecompositions.identity	  s	     rR   c                r    t        j                  t        j                  dt        j                        |       S NrD   )r0   truedivr  r   int32xs    rP   
reciprocalzOpDecompositions.reciprocal  s"    {{3<<5;;7;;rR   c                .    t        j                  | |       S rh   )r0   mulr  s    rP   squarezOpDecompositions.square  s    wwq!}rR   c                    t        j                  t        j                  dt        j                        t        j
                  |             S r   )r0   subr  r   float32erfr  s    rP   erfczOpDecompositions.erfc  s*    wws||Au}}5swwqzBBrR   c                    t        j                  t        j                  t        j                  |             t        j                  |             S rh   )r0   r  expr  r  r  s    rP   erfcxzOpDecompositions.erfcx  s,    wwswwszz!}-sxx{;;rR   c                    t        j                  t        j                  |       t        j                  dt        j
                              S r   )r0   r
  r  r  r   r  r  s    rP   expm1zOpDecompositions.expm1  s*    wwswwqz3<<5==#ABBrR   c           	         t        j                  t        j                  |       t        j                  dt	        j                  d      z  t
        j                              S )NrD   
   r0   r  logr  mathr   r  r  s    rP   log10zOpDecompositions.log10"  s7    wwswwqz3<<DHHRL0@%--#PQQrR   c           	         t        j                  t        j                  |       t        j                  dt	        j                  d      z  t
        j                              S )NrD   r   r  r  s    rP   log2zOpDecompositions.log2&  s6    wwswwqz3<<DHHQK#OPPrR   c           
         t        j                  t        j                  | t        j                  t	        j
                  d      t        j                                    S )Nr   )r0   r  r  r  r  r  r   r  r  s    rP   exp2zOpDecompositions.exp2*  s3    wwswwq#,,txx{EMM"JKLLrR   c           	         t        j                  t        j                  | t        j                  dt        j
                                    S r   )r0   r  addr  r   r  r  s    rP   log1pzOpDecompositions.log1p.  s+    wwswwq#,,q%++">?@@rR   c                    t        j                  dt        j                        }t        j                  |t        j
                  |t        j                  t        j                  |                         S r   )r0   r  r   r  r  r  r  neg)r  ones     rP   sigmoidzOpDecompositions.sigmoid2  sC    ll1ekk*{{3SWWSWWQZ-@ ABBrR   c                r    t        j                  | t        j                  dt        j                              S Nr   )r0   r   r  r   r  r  s    rP   reluzOpDecompositions.relu7  s"    {{1cll1ekk:;;rR   c                V    t        j                  t        j                  | |      |      S rh   )r0   r  r  r  yzs      rP   fmazOpDecompositions.fma;  s     wwswwq!}a((rR   c                T    t        j                  t        j                  |       |      S rh   )r0   r  floorr   r   s     rP   floor_to_intzOpDecompositions.floor_to_int@      ||CIIaL%00rR   c                T    t        j                  t        j                  |       |      S rh   )r0   r  ceilr.  s     rP   ceil_to_intzOpDecompositions.ceil_to_intD  s    ||CHHQK//rR   c                T    t        j                  t        j                  |       |      S rh   )r0   r  truncr.  s     rP   trunc_to_intzOpDecompositions.trunc_to_intH  r0  rR   c           	        t        j                  | |      }t        j                  t        j                  |t        j                  dt
        j                              t        j                  t        j                  |      t        j                  |                  }t        j                  |t        j                  ||      |      S r%  )
r0   modand_ner  r   r  signbitwherer  )r   r   rconds       rP   	remainderzOpDecompositions.remainderL  sy    GGAqMxxFF1cll1ekk23FF3;;q>3;;q>2
 yyswwq!}a00rR   c                T    t        j                  t        j                  |       |      S rh   )r0   r  roundr.  s     rP   round_to_intzOpDecompositions.round_to_intU  r0  rR   N)rc   OpVarTrr   rC  r  rC  rr   rC  )r  rC  r)  rC  r*  rC  rr   rC  )r   rC  r   r   rr   rC  r   rC  r   rC  rr   rC  )rw   rx   ry   rz   r   r  r  r  r  r  r  r  r  r  r  r#  r&  r+  r/  r3  r6  r?  rB  r}   rR   rP   r  r    s}      < <   C C < < C C R R Q Q M M A A C C < < ) ) 1 1 0 0 1 1 1 1 1 1rR   r  z[a-z0-9_.]+|\([^)]*\)|)flagsc                    | d   dk7  st        |       dk  ryd}t        | dd        D ]3  \  }}|dk(  r|dz  }n
|dk(  r|dz  }|dk(  s!|t        |       dz
  k7  s3 y |dk(  sJ y)Nr   (r   FrD   r   T)r  	enumerate)stringr   ichars       rP   _all_in_parensrM  ]  s    ayC3v;?EVABZ( 43;QJES[QJEA:!s6{Q. A::rR   c                  H   e Zd Zed d       Zed!d       Zed"d       Zed#d       Zed$d       Zed$d       Z	ed$d       Z
ed$d       Zed$d	       Zed%d
       Zed&d       Z	 	 d'	 	 	 	 	 	 	 	 	 d(dZ	 	 	 	 	 	 	 	 	 	 d)dZd*dZ	 d+	 	 	 	 	 	 	 	 	 d,dZd-dZ	 	 	 	 	 	 	 	 	 	 d.dZ	 	 	 	 	 	 	 	 d/dZ	 	 	 	 	 	 	 	 	 	 d0dZ	 	 d1	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d2dZd3dZdej4                  ddd	 	 	 	 	 	 	 	 	 	 	 	 	 d4dZd5dZd6dZed7d       Zed8d       Z ed9d       Z!y):OpOverridesc                r    t        | t              s t        j                  |       st	        |       r| S d|  dS NrH  r   )r>  CSEVariable_RE_PAREN_NOT_NEEDED	fullmatchrM  )rJ  s    rP   parenzOpOverrides.parenm  s9     v{+#--f5f% M6(!}rR   c                    t        |       S rh   )repr)rc   r   s     rP   r  zOpOverrides.constantx  s    E{rR   c                2    dt         j                  |        S )N~rO  rU  r  s    rP   bitwise_notzOpOverrides.bitwise_not|  s    ;$$Q'())rR   c                2    t         j                  |        dS )Nz == 0rZ  )r   s    rP   logical_notzOpOverrides.logical_not  s    ##A&'u--rR   c                \    t         j                  |        dt         j                  |       S )Nz & rZ  r  r)  s     rP   bitwise_andzOpOverrides.bitwise_and  +    ##A&'s;+<+<Q+?*@AArR   c                \    t         j                  |        dt         j                  |       S )Nz | rZ  r_  s     rP   
bitwise_orzOpOverrides.bitwise_or  ra  rR   c                \    t         j                  |        dt         j                  |       S )Nz ^ rZ  r_  s     rP   bitwise_xorzOpOverrides.bitwise_xor  ra  rR   c                \    t         j                  |        dt         j                  |       S )Nz << rZ  r_  s     rP   bitwise_left_shiftzOpOverrides.bitwise_left_shift  +    ##A&'tK,=,=a,@+ABBrR   c                \    t         j                  |        dt         j                  |       S )Nz >> rZ  r_  s     rP   bitwise_right_shiftzOpOverrides.bitwise_right_shift  rh  rR   c                .    t        j                  | |      S rh   )r0   r  r   s     rP   int_truedivzOpOverrides.int_truediv  s    
 {{1a  rR   c                T    t        j                  | t        j                  |            S rh   )r0   r  r   Integer)ra   r   s     rP   r  zOpOverrides.load_seed  s    xxemmF344rR   Tc                *    t        t        |            S rh   )r,   ru   )rd   r  r   checkwrap_negs        rP   indirect_indexingzOpOverrides.indirect_indexing  s     "#c(++rR   c                D    t        t        |       j                   d      )Nz,: check_bounds should be handled by CSEProxyr   r?  rw   rd   r   r   loweruppers        rP   check_boundszOpOverrides.check_bounds  s'     "Dz""##OP
 	
rR   c                D    t        t        |       j                   d      )Nz$: load should be handled by CSEProxyrt  rd   ra   rz  s      rP   r  zOpOverrides.load  s%    !Dz""##GH
 	
rR   Nc                D    t        t        |       j                   d      )Nz%: store should be handled by CSEProxyrt  rd   ra   rz  rc   rX   s        rP   r  zOpOverrides.store  s'     "Dz""##HI
 	
rR   c                D    t        t        |       j                   d      )Nz/: store_reduction should be handled by CSEProxyrt  rd   ra   rz  rc   s       rP   r  zOpOverrides.store_reduction  s%    !Dz""##RS
 	
rR   c                D    t        t        |       j                   d      )Nz): reduction should be handled by CSEProxyrt  rd   r   	src_dtypereduction_typerc   s        rP   r  zOpOverrides.reduction  s'     "Dz""##LM
 	
rR   c                D    t        t        |       j                   d      )Nz$: scan should be handled by CSEProxyrt  rd   dtypes
combine_fnvaluess       rP   scanzOpOverrides.scan  s'     "Dz""##GH
 	
rR   c                D    t        t        |       j                   d      )Nz$: sort should be handled by CSEProxyrt  rd   r  r  stable
descendings        rP   sortzOpOverrides.sort  s'     "Dz""##GH
 	
rR   c                D    t        t        |       j                   d      )Nz): bucketize should be handled by CSEProxyrt  rd   r  
boundariesboundary_indicesindexing_dtyperightsortersorter_indicess           rP   	bucketizezOpOverrides.bucketize  s'     "Dz""##LM
 	
rR   c                D    t        t        |       j                   d      )Nz2: halide_clamp only implemented for Halide backendrt  )rd   rc   r   rp  s       rP   halide_clampzOpOverrides.halide_clamp  s%    !Dz""##UV
 	
rR   rD   )constraintsr   is_purepackc               D    t        t        |       j                   d      )Nz<: inline_asm_elementwise only implemented for Triton backendrt  )rd   asmr  r   r  r  r  s          rP   inline_asm_elementwisez"OpOverrides.inline_asm_elementwise  s'     "Dz""##_`
 	
rR   c                D    t        t        |       j                   d      )Nz.: ops.output should not appear at codegen timeAssertionErrorr?  rw   rl   s     rP   r  zOpOverrides.output  s%    Dz""##QR
 	
rR   c                D    t        t        |       j                   d      )Nz3: ops.placeholder should not appear at codegen timer  rd   rz  s     rP   r  zOpOverrides.placeholder  s%    Dz""##VW
 	
rR   c                0     d fd} |_         d|_        |S )Nc                J    t        t        |       j                   d       )Nz does not implement ops.rt  )rd   rm   r  ra   s      rP   unimplementedz1OpOverrides._unimplemented.<locals>.unimplemented  s*    %:&&''?vF rR   T)rd   rO  rm   r	   r  r	   rr   rC  )rw   is_unimplemented)ra   r  s   ` rP   _unimplementedzOpOverrides._unimplemented  s     	
 "&)-&rR   c                p    t        | |d       }t        t        |d       }| xs ||k(  xs t        |dd      S )Nr  F)getattrr1   )r  ra   fn
default_fns       rP   _is_unimplementedzOpOverrides._is_unimplemented  s?    S$%Zt4
vSz)SWR9KU-SSrR   c                P   |dv sJ |       t         j                         D ]  \  }}t        ||      }|/| j                  |      s&t	        | || j                  |             C|| j                  vsJ d| d| j                          ||_        t	        | |t        |              y )N)r[  rY  cppvecrZ  re  zmultiple definitions of z on )	pointwise_overrides_datar  r  r  setattrr  __dict__rw   r   )r  r  funcnamedataimpls        rP   _initialize_pointwise_overridesz+OpOverrides._initialize_pointwise_overrides"  s    EEMvME6<<> 
	;NHd4(D|((2C3+=+=h+GHs||3 .xjS\\NK3 !)X|D'9:
	;rR   )rJ  rC  rr   rC  )rc   zUnion[bool, float, int]r   r   rr   rC  rD  )r   rC  rr   rC  )r  rC  r)  rC  rr   rC  rE  )ra   ru   r   rC  rr   rC  TT)
r  rC  r   Union[sympy.Expr, int]rp  r   rq  r   rr   sympy.Symbol
r   r   r   r   rv  r   rw  r   rr   rs   )ra   ru   rz  r   rr   rC  rh   )
ra   ru   rz  r   rc   rC  rX   r4   rr   rs   )ra   ru   rz  r   rc   rC  rr   rs   )
r   r   r  r   r  r3   rc   !Union[OpVarT, tuple[OpVarT, ...]]rr   r  )r  tuple[torch.dtype, ...]r  zFCallable[[tuple[OpVarT, ...], tuple[OpVarT, ...]], tuple[OpVarT, ...]]r  tuple[OpVarT, ...]rr   r  )
r  r  r  r  r  r   r  r   rr   r  NN)r  rC  r  .tuple[str, sympy.Expr, sympy.Expr, sympy.Expr]r  rC  r  r   r  r   r   Optional[tuple[str, sympy.Expr]]r  zOptional[OpVarT]rr   rC  )rc   rC  r   r   rp  r   rr   rC  )r  rC  r  ru   r  r   r   r   r  r   r  r   rr   rC  )rm   rC  rr   rs   )rz  r   rr   rC  )ra   ru   rr   zCallable[..., OpVarT]ra   ru   rr   r   )r  ru   rr   rs   )"rw   rx   ry   r   rU  r  r[  r]  r`  rc  re  rg  rj  rl  r  rr  rx  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r}   rR   rP   rO  rO  l  s'       * * . . B B B B B B C C C C ! ! 5 5 ,, %, 	,
 , 
,

&0
9=
FJ
	

 NR

 *
39
AJ
	


	
	
 	
 &		

 1	
 
+	

'


 #
 

	
'	
 #	
 		

 	
 
	
$ 48+/

 C
 !	

 $
 
 1
 )
 


 &*"]]

 
 #	

 
 
 
 





   T T
 ; ;rR   rO  c                  |    e Zd ZU ded<   ded<   dZded<   dZded<   ej                  Zd	ed
<   dZ	ded<   dZ
ded<   y)OverridesDataru   ra   r   rY  NzOptional[Callable[..., str]]r[  r  r   type_promotion_kindrZ  re  )rw   rx   ry   r{   r[  r  r   DEFAULTr  rZ  re  r}   rR   rP   r  r  3  sQ    
I	+/F(/+/F(/'// 8  ,0F(/(,C	%,rR   r  airy_aic                    d|  dS )Nzairy_ai_forward(r   r}   r  s    rP   r_  r_  G  s    (1- rR   special_airy_ai)r  rY  ra   	bessel_j0c                    d|  dS )Nzbessel_j0_forward(r   r}   r  s    rP   r_  r_  L      *1#Q/ rR   c                    d|  dS )Nzlibdevice.j0(r   r}   r  s    rP   r_  r_  M      =1- rR   special_bessel_j0)r  rY  r[  ra   	bessel_j1c                    d|  dS )Nzbessel_j1_forward(r   r}   r  s    rP   r_  r_  R  r  rR   c                    d|  dS )Nzlibdevice.j1(r   r}   r  s    rP   r_  r_  S  r  rR   special_bessel_j1	bessel_y0c                    d|  dS )Nzbessel_y0_forward(r   r}   r  s    rP   r_  r_  X  r  rR   c                    d|  dS )Nzlibdevice.y0(r   r}   r  s    rP   r_  r_  Y  r  rR   special_bessel_y0	bessel_y1c                    d|  dS )Nzbessel_y1_forward(r   r}   r  s    rP   r_  r_  ^  r  rR   c                    d|  dS )Nzlibdevice.y1(r   r}   r  s    rP   r_  r_  _  r  rR   special_bessel_y1digammac                    d|  dS )Nzcalc_digamma(r   r}   r  s    rP   r_  r_  d  s    aS* rR   c                    |  dS )Nz
.digamma()r}   r  s    rP   r_  r_  e  s    A3j) rR   )r  rY  r  ra   r  c                    d|  dS )Nzcalc_erfcx(r   r}   r  s    rP   r_  r_  l      A3a( rR   c                    d|  dS )Nzlibdevice.erfcx(r   r}   r  s    rP   r_  r_  m  s    +A3a0 rR   special_erfcxr+  c                    d|  d| d| dS )Nz	std::fma(r   r   r}   r(  s      rP   r_  r_  r  s    is"QCr!A6 rR   c                    d|  d| d| dS )Nzfmadd(r   r   r}   r(  s      rP   r_  r_  s  s    s"QCr!A6 rR   c                    d|  d| d| dS )Nzlibdevice.fma(r   r   r}   r(  s      rP   r_  r_  t  s    s"QCr!A> rR   )r  rY  r  r[  ra   igammac                    d|  d| dS Nzcalc_igamma(r   r   r}   r_  s     rP   r_  r_  z      <s"QCq1 rR   igammacc                    d|  d| dS Nzcalc_igammac(r   r   r}   r_  s     rP   r_  r_        =2aS2 rR   gammaincc                    d|  d| dS r  r}   r_  s     rP   r_  r_    r  rR   special_gammainc	gammainccc                    d|  d| dS r  r}   r_  s     rP   r_  r_    r  rR   special_gammaincci0c                    d|  dS )Nzcalc_i0(r   r}   r  s    rP   r_  r_        1o rR   c                    d|  dS Nzlibdevice.cyl_bessel_i0(r   r}   r  s    rP   r_  r_        3A3a8 rR   c                    |  dS )Nz.i0()r}   r  s    rP   r_  r_    s    A3e rR   )r  rY  r[  r  ra   i0ec                    d|  dS )Nz	calc_i0e(r   r}   r  s    rP   r_  r_        	!A& rR   c                    |  dS )Nz.i0e()r}   r  s    rP   r_  r_    s    A3f rR   special_i0ei1c                    d|  dS )Nzcalc_i1(r   r}   r  s    rP   r_  r_    r  rR   c                    d|  dS Nzlibdevice.cyl_bessel_i1(r   r}   r  s    rP   r_  r_    r  rR   
special_i1i1ec                    d|  dS )Nz	calc_i1e(r   r}   r  s    rP   r_  r_    r  rR   special_i1elog_ndtrc                    d|  dS )Nzcalc_log_ndtr(r   r}   r  s    rP   r_  r_    s    qc+ rR   special_log_ndtrmodified_bessel_i0c                    d|  dS )Nzmodified_bessel_i0_forward(r   r}   r  s    rP   r_  r_        3A3a8 rR   c                    d|  dS r  r}   r  s    rP   r_  r_    r  rR   special_modified_bessel_i0modified_bessel_i1c                    d|  dS )Nzmodified_bessel_i1_forward(r   r}   r  s    rP   r_  r_    r  rR   c                    d|  dS r  r}   r  s    rP   r_  r_    r  rR   special_modified_bessel_i1modified_bessel_k0c                    d|  dS )Nzmodified_bessel_k0_forward(r   r}   r  s    rP   r_  r_    r  rR   special_modified_bessel_k0modified_bessel_k1c                    d|  dS )Nzmodified_bessel_k1_forward(r   r}   r  s    rP   r_  r_    r  rR   special_modified_bessel_k1ndtrc                    d|  dS )Nz
calc_ndtr(r   r}   r  s    rP   r_  r_    s    
1#Q' rR   special_ndtrndtric                    d|  dS )Nzcalc_ndtri(r   r}   r  s    rP   r_  r_    r  rR   special_ndtri	polygammac                *    |  d| d|  d| d| d|  dS )Nz == 0 ? calc_digamma(z) : (z == 1 ? trigamma(z) : calc_polygamma(r   z))r}   r_  s     rP   r_  r_    s8    S%aSaS0A!DWXYWZZ\]^\__ab rR   scaled_modified_bessel_k0c                    d|  dS )Nz"scaled_modified_bessel_k0_forward(r   r}   r  s    rP   r_  r_        :1#Q? rR   !special_scaled_modified_bessel_k0scaled_modified_bessel_k1c                    d|  dS )Nz"scaled_modified_bessel_k1_forward(r   r}   r  s    rP   r_  r_    r#  rR   !special_scaled_modified_bessel_k1spherical_bessel_j0c                    d|  dS )Nzspherical_bessel_j0_forward(r   r}   r  s    rP   r_  r_    s    4QCq9 rR   special_spherical_bessel_j0zetac                    d|  d| dS )Nzzeta(r   r   r}   r_  s     rP   r_  r_    s    52aS* rR   special_zetachebyshev_polynomial_tc                    d|  d| dS )Nzchebyshev_polynomial_t_forward(r   r   r}   r_  s     rP   r_  r_        :1#Rs!D rR   special_chebyshev_polynomial_tchebyshev_polynomial_uc                    d|  d| dS )Nzchebyshev_polynomial_u_forward(r   r   r}   r_  s     rP   r_  r_    r0  rR   special_chebyshev_polynomial_uchebyshev_polynomial_vc                    d|  d| dS )Nzchebyshev_polynomial_v_forward(r   r   r}   r_  s     rP   r_  r_    r0  rR   special_chebyshev_polynomial_vchebyshev_polynomial_wc                    d|  d| dS )Nzchebyshev_polynomial_w_forward(r   r   r}   r_  s     rP   r_  r_    r0  rR   special_chebyshev_polynomial_wlegendre_polynomial_pc                    d|  d| dS )Nzlegendre_polynomial_p_forward(r   r   r}   r_  s     rP   r_  r_        9!BqcC rR   special_legendre_polynomial_pshifted_chebyshev_polynomial_tc                    d|  d| dS )Nz'shifted_chebyshev_polynomial_t_forward(r   r   r}   r_  s     rP   r_  r_        B1#Rs!L rR   &special_shifted_chebyshev_polynomial_tshifted_chebyshev_polynomial_uc                    d|  d| dS )Nz'shifted_chebyshev_polynomial_u_forward(r   r   r}   r_  s     rP   r_  r_    rA  rR   &special_shifted_chebyshev_polynomial_ushifted_chebyshev_polynomial_vc                    d|  d| dS )Nz'shifted_chebyshev_polynomial_v_forward(r   r   r}   r_  s     rP   r_  r_    rA  rR   &special_shifted_chebyshev_polynomial_vshifted_chebyshev_polynomial_wc                    d|  d| dS )Nz'shifted_chebyshev_polynomial_w_forward(r   r   r}   r_  s     rP   r_  r_    rA  rR   &special_shifted_chebyshev_polynomial_whermite_polynomial_hc                    d|  d| dS )Nzhermite_polynomial_h_forward(r   r   r}   r_  s     rP   r_  r_    s    82aSB rR   special_hermite_polynomial_hhermite_polynomial_hec                    d|  d| dS )Nzhermite_polynomial_he_forward(r   r   r}   r_  s     rP   r_  r_    r=  rR   special_hermite_polynomial_helaguerre_polynomial_lc                    d|  d| dS )Nzlaguerre_polynomial_l_forward(r   r   r}   r_  s     rP   r_  r_  !  r=  rR   special_laguerre_polynomial_lzdict[str, OverridesData]r  c                     t         fdt        j                  j                  t        j                  j                  t        j                  j
                  t        j                  j
                  fD              S )Nc              3  &   K   | ]  }|v  
 y wrh   r}   )r  r  ra   s     rP   r  z$is_buffer_removed.<locals>.<genexpr>(  s       		   )anyr5   r   removed_bufferskernelinplaced_to_removera   s   `rP   is_buffer_removedr]  '  sU      GG##HH$$GG&&HH''	
  rR   c                  4     e Zd ZdZd fdZddZddZ xZS )DeferredLinezHA line that can be 'unwritten' by adding name to V.graph.removed_buffersc                V    t         |   |       || _        t        |t              rJ y rh   )r  r   ra   r>  r%   )rd   ra   liner  s      rP   r   zDeferredLine.__init__6  s+    	d$45555rR   c                F    t        | j                        s| j                  S y rh   )r]  ra   ra  ri   s    rP   __call__zDeferredLine.__call__;  s     +99rR   c                .    t        | j                  |      S rh   )r_  ra   )rd   ra  s     rP   	_new_linezDeferredLine._new_line@  s    DIIt,,rR   )ra   ru   ra  ru   r   )ra  ru   rr   r_  )rw   rx   ry   rz   r   rc  re  r  r  s   @rP   r_  r_  3  s    R6

-rR   r_  c                      e Zd ZdddZy)BracesBufferc                H     t         j                  d fd       } |       S )Nc               3    K   t              D ](  } j                  d       xj                  dz  c_        * t               D ](  } xj                  dz  c_        j                  d       * d  t               D ](  } j                  d       xj                  dz  c_        * t              D ](  } xj                  dz  c_        j                  d       * y w)N{rD   })ranger  _indent)_r   rd   s    rP   ctxz BracesBuffer.indent.<locals>.ctxF  s     6] "s#!" F7^ $!s#$ F7^ "s#!" 6] $!s#$s   C C#)rr   Iterator[None])
contextlibcontextmanager)rd   r   ro  s   `` rP   indentzBracesBuffer.indentE  s$    		"	"	$ 
#	$ urR   N)rD   )r   r   rr   z'contextlib.AbstractContextManager[None])rw   rx   ry   rs  r}   rR   rP   rg  rg  D  s    rR   rg  c                  "    e Zd ZU ded<   ded<   y)InplacedBufferru   r   r   other_namesNr   r}   rR   rP   ru  ru  Y  s    OrR   ru  c                  .    e Zd ZU ded<   dZded<   ddZy)	ArgNameru   ra   Fr   is_constexprc                B    | j                    | j                  rd S d S )Nz : tl.constexprr  )ra   ry  ri   s    rP   	full_namezArgName.full_named  s*    ))$2C2C.LMMLMMrR   Nrt   )rw   rx   ry   r{   ry  r{  r}   rR   rP   rx  rx  ^  s    
IL$NrR   rx  c                      e Zd ZddZy)
RemovedArgc                     y)NREMOVEDr}   ri   s    rP   __str__zRemovedArg.__str__i  s    rR   Nrt   )rw   rx   ry   r  r}   rR   rP   r}  r}  h  s    rR   r}  c                      e Zd Ze	 	 	 	 	 	 	 	 dd       ZddZddZedd       ZddZddZ	ddZ
ddZdd	Zdd
ZddZd dZd!dZd"dZd#dZ	 d$	 	 	 d%dZ	 	 d&dZd'dZd(dZd)dZy)*
KernelArgsc                ~    |j                  |t              }t        |t              r|  t	        |       x||<   }|S |S rh   )getr  r>  r}  r  )r   odictra   result
new_results        rP   _lookupzKernelArgs._lookupq  sD     */4)Afj)*0#e*'>>E$K*rR   c                J    i | _         i | _        i | _        i | _        g | _        y rh   )input_buffersoutput_buffersinplace_buffersr  workspace_argsri   s    rP   r   zKernelArgs.__init__}  s)    -/ACMO/124rR   c                    dj                  dj                  t        t        | j                  | j
                  | j                  | j                  g                  S )NzKernelArgs({})r   )formatr   maprW  r  r  r  r  ri   s    rP   __repr__zKernelArgs.__repr__  sS    &&II**++,,	

 	
rR   c                "    t        | t              S rh   )r>  r}  r\  s    rP   _buffer_is_marked_removedz$KernelArgs._buffer_is_marked_removed  s     $
++rR   c                :   t         j                  j                  r4t         j                  j                  j                  j	                  ||      }|t         j                  j
                  vsJ |       || j                  v rt        t        | j                  |         S || j                  v r't        t        | j                  |         j                  S |j                  d      r| j                  d| j                  |      S | j                  d| j                  |      S )Nseedin_ptr)r5   r   r  mutation_real_namer  rY  r  r   ru   r  ru  r   r  r  r  r   s     rP   inputzKernelArgs.input  s    7777$$77;;D$GD1772228D824&&&T006774'''(<(<T(BCNNN??6"<<(:(:DAA||Hd&8&8$??rR   c                   t         j                  j                  r4t         j                  j                  j                  j	                  ||      }|t         j                  j
                  vsJ |       || j                  v r't        t        | j                  |         j                  S | j                  d| j                  |      S )Nout_ptr)r5   r   r  r  r  rY  r  r   ru  r   r  r  r   s     rP   r  zKernelArgs.output  s    7777$$77;;D$GD1772228D824'''(<(<T(BCNNN||It':':DAArR   c                   |t         j                  j                  v r)t         j                  j                  j                  |       || j                  vsJ |       || j                  v rL| j                  |   }t        |t              rJ |j                  j                  |       || j                  |<   y | j                  j                         D cg c]  }t        |t              s| }}| j                  j                         D cg c]  }t        |t              r| }}t        t        |            t        |      z   }t        d| ||g      }|| j                  |<   || j                  |<   y c c}w c c}w )N
in_out_ptr)r5   r   unaligned_buffersr  r  r>  r}  rv  appendr  r  r/   ru  )rd   
input_nameoutput_namebufvalalive_buffersrY  inplace_buffer_idxs           rP   make_inplacezKernelArgs.make_inplace  sk   222GG%%))+6$"6"66CC6---&&z2C!#z222OO"";/03D  -  //668!#z2 M   //668c:. O 
 "%VM%:!;c/>R!R /01[)C 03D  ,03D  -!
s   E3E8c                J   t        |t        j                  |      t        j                  j                         t         j                               }t        | j                        D ]  \  }}t         j                  ||      r?|j                  }t         j                  ||      | j                  |<   |j                  |fc S |j                  |j                  k7  r|j                  |j                  k7  rJ |        | j                  j                  |       |j                  dfS )a  
        Allocate or extend a workspace buffer of nbytes bytes.

        This function manages the allocation of a workspace buffer. It either creates
        a new WorkspaceArg or extends an existing one.

        Note:
        - Calling this function will in-place mutate the args by adding or updating
        a WorkspaceArg.
        - The codegen for generating the Python argdefs and call_defs will check
        this field and allocate the buffer accordingly.
        - A new argument "ws_ptr" will be present in the generated code.

        Args:
            nbytes (sympy.Expr): The number of bytes to allocate.
            zero_fill (bool): Whether to initialize the buffer to zero.

        Returns:
            Tuple[str, int]: A tuple containing:
                - "ws_ptr": A string identifier for the workspace pointer.
                - offset: An integer representing the byte offset in the workspace.
        )r   r   r   r   r   )r   r   r   r5   r   get_current_device_or_throwr   rI  r  r   r   r   r   r   r  )rd   nbytesr   argrK  existing_argr   s          rP   r%  zKernelArgs.workspace  s   . '11)<77668#//1	
  ))<)<= 	OA|$$\37%++)5):):<)M##A&#..66''3>>9 ++s~~= >	 	""3'~~q  rR   c           
        t         j                  j                         }t        |t        j
                  t        j                  dd|j                   d|j                   |      }| j                  D ]*  }|j                  |j                  k(  s||k(  r#J ||f        | j                  j                  |       |j                  S )a  
        Lazily allocate a graph-wide semaphores buffer with at least min_size.  This is a single buffer shared by
        all kernels and zero initialized once at graph start.  Each kernel must leave the buffer zeroed on exit.

        Warning: multiple calls to this function will return the same buffer.

        Args:
            min_size: the number of int32 semaphores required

        Returns:
            name of the semaphores buffer
        sem_ptrsemaphores_rn  )r   r   r   r   r   r   )r5   r   r  r   r   r   r   uint32r?  rz  r  r   r  )rd   min_sizecurrent_devicer  r  s        rP   
semaphoreszKernelArgs.semaphores  s     <<>'66,, $^%8%8$9>;O;O:PQ!
 !// 	@L&&#..8l*?S,,??*	@ 	""3'~~rR   c                f   t        |t              sJ t        |      |f       t        j                  |      }|| j
                  v r| j
                  |   S | j
                  j                         v r0 t        fd| j
                  j                         D               | j
                  |<   S )Nc              3  F   K   | ]  }|j                        sd   yw)rD   N)r  )r  r  ra   s     rP   r  z)KernelArgs.seed_offset.<locals>.<genexpr>  s     U1!,,tBTQUs   !!)r>  r   r?  r   rn  r  r  sum)rd   ra   rc   s    ` rP   seed_offsetzKernelArgs.seed_offset  s    %%;UU';;%e$DMM!==''4==''))&U(<(<(>UUVW   $erR   c                    t        |t        j                        sJ t        |      |f       |j                  dk(  rd| j
                  |<   y| j                  d| j
                  |      S )Nr  ks)r>  r   Symbolr?  ra   r  r  r   s     rP   r   zKernelArgs.size  sX    $-AT
D/AA-99"(DMM$||D$--66rR   c                    t        | j                  j                         | j                  j                         | j                  j                               S rh   )r   r  keysr  r  ri   s    rP   
call_nameszKernelArgs.call_names!  sA    ##%t':':'?'?'A4==CUCUCW
 	
rR   c                   | j                   j                  |d      }|t        |t              s|j                  S | j
                  j                  |d      }|t        |t              s|S | j                  j                  |d      S )z;
        Returns inner name of a given outer name.
        N)r  r  r>  r}  r   r  r  )rd   ra   inplacedr  s       rP   arg_namezKernelArgs.arg_name&  s}     ''++D$7
8Z(H&&&))--dD9":k:+N!!%%dD11rR   c                    |S rh   r}   )rd   r  r   s      rP   wrap_ptr_argzKernelArgs.wrap_ptr_arg2  s    
rR   c                    t        |      S rh   )ru   )rd   r   s     rP   wrap_size_argzKernelArgs.wrap_size_arg5  s    4yrR   Nc                   ddl m} |ddl m} |}g }g }g }t        | j                  j                               D ]  }t        |t              r|j                  d   }|j                  }	t        j                  j                  |      }
||
   }|j                  | d|	        |j                  | j                  ||
             |j                  | d        | j                  j!                         D ]  \  }}	|| j                  v rt        j                  j                  |      }
||
   }|j                  d| d|	        |j                  | j                  ||
             |j                  d| d        | j"                  j!                         D ]  \  }}|| j                  v st        |t              r%t        j                  j                  |      }
||
   }|j                  | d|        |j                  | j                  ||
             |j                  | d        | j$                  j!                         D ]  \  }}	|j                  d| d|	        |j                  | j'                  |             |j                  d|        t        j                  j(                  slt        j                  j(                  j+                  |        | j,                  rJ d	       |||fS )
NrD   )
INDEX_TYPE)r  r  z* *zconst  zWorkspace not supported on CPU )r  r  r  r/   r  r  r>  r}  rv  r   r5   r   r   r  r  r  r  r  r  r  wrapper_codeensure_size_computedr  )rd   dtype_to_cpp_typer  r  	call_argsarg_defs	arg_typesr  outerinnerr   	cpp_dtypemaybe_inners                rP   cpp_argdefszKernelArgs.cpp_argdefs8  s    	*$/ ,		t33::<= 		.H(J/((,E''EGG%%e,E)%0IOOykE734T..ue<=	{!_-		. !..446 	4LE5,,,GG%%e,E)%0IOOfYKr%9:T..ue<=vi[23	4 #'"5"5";";"= 	.E;,,,
;
0SGG%%e,E)%0IOOykK=9:T..ue<=	{!_-	. !MM//1 	ALE5OOfZL%9:T//67vj\23ww##$$99%@	A &&I(II&I--rR   c                   g }g }g }g }t        | j                  j                               D ]  }t        |t              r|j                  t        |j                               |j                  |j                  d          |j                  t        j                  j                  |j                  d                |j                  t        |j                  |j                  d   t        j                  j                  |j                  d                       t        | j                  j                         | j                   j                               D ]  \  }}|| j                  v st        |t              r%|j                  t        |             |j                  |       |j                  t        j                  j                  |             |j                  t        ||t        j                  j                  |                    | j"                  j                         D ]  \  }}|j                  t        |             |j                  |       |j                  t%        |             |j                  t'        ||             t        j                  j(                  st        j                  j(                  j+                  |        | j,                  D ]m  }|j                  t        |j                               |j                  |j.                         |j                  |       |j                  |j0                         o ||||fS )Nr  )ra   r   r   )r/   r  r  r>  r}  r  rx  r   rv  r5   r   r   r   r   r  r  r  r  r?  r   r  r  r  r   r   )	rd   r  r  r  precompile_argsr  r  r  r  s	            rP   python_argdefszKernelArgs.python_argdefsh  s    #%!	!	/1t33::<= 	H(J/OOGH$7$789X11"56QWW..x/C/CB/GHI""!,,#//3''++H,@,@,DE	 "$$&(;(;(A(A(C
 	LE5 ,,,
5*0MOOGEN+U#QWW..u56"" ''++E2	 !MM//1 	ALE5OOGEN+U#T%[)""75%#89ww##$$99%@	A && 	(COOGCNN34S^^,""3'SYY'		(
 OY>>rR   c              #    K   t        | j                  j                               D ]  }t        |t              r|j
                  D ]  }|t        j                  j                  v s|t        j                  j                  v r<|| j                  v r| j                  |   |j                  f || j                  v svt        t        | j                  |         |j                  f   y wrh   )r/   r  r  r>  r}  rv  r5   r   r[  rZ  r  r   r  r   ru   )rd   r  others      rP   aliaseszKernelArgs.aliases  s     t33::<= 	UH(J/!-- 	UQWW777 ; ;;D...,,U3X5H5HHHD///sD$7$7$>?ATATTT	U	Us   B9C,<0C,c                    t        | j                  j                  |t              t              xr. t        | j
                  j                  |t              t              S rh   )r>  r  r  r  r}  r  r   s     rP   
is_removedzKernelArgs.is_removed  sK    ##D'2J
 N--11$@*M	NrR   c                l   t               }t        | j                  j                               D ]1  }t	        |t
              r|j                  |j                  d          3 | j                  j                         D ]5  \  }}|| j                  v st	        |t
              r%|j                  |       7 |S )Nr  )
r   r/   r  r  r>  r}  r  rv  r  r  )rd   	live_outsr  r  r  s        rP   live_output_bufferszKernelArgs.live_output_buffers  s    %/\	t33::<= 	4H(J/MM(..r23	4 !//557 	!LE5,,,
5*0MMM% 	! rR   )r   ru   r  z6Union[dict[_T, Union[str, RemovedArg]], dict[_T, str]]ra   rG   rr   ru   rq   rt   )ra   r	   rr   r   r'  )r  ru   r  ru   rr   rs   )r  r   r   r   rr   ztuple[str, int])r  r   rr   ru   )ra   ru   rc   r   rr   ru   )ra   r  rr   ru   )rr   zIterator[str])ra   ru   rr   r   )r  ru   r   r   rr   ru   )r   
SymbolLikerr   ru   rh   )r  z Optional[dict[torch.dtype, str]]rr   z&tuple[list[str], list[str], list[str]])rr   z?tuple[list[ArgName], list[str], list[KernelArgType], list[Any]])rr   zIterator[tuple[str, str]]r  )rr   zOrderedSet[str])rw   rx   ry   r   r  r   r  r  r  r  r  r%  r  r  r   r  r  r  r  r  r  r  r  r  r}   rR   rP   r  r  p  s    		E	 	 
		 	5
 , ,
@B48'!R87


2 EI..!A..	/..`/?	H/?bUN
rR   r  c                  Z     e Zd ZdZ	 d	 	 	 	 	 d	 fdZd
dZddZddZddZd
dZ	 xZ
S )rR  aD  A CSEVariable is just a name for an expression but it is useful to be able to annotate them on a backend dependent basis.
    To do so, the backends can simply overload `Kernel.create_cse_var`
    The "CSEVariable.update_on_args" method gives you a hook for annotations
    See example of TritonCSEVariable in triton.py
    c                    t         |           t        |t              sJ t	        |             || _        || _        d| _        || _        y r   )	r  r   r>  r   r?  ra   bounds	use_countr   )rd   ra   r  r   r  s       rP   r   zCSEVariable.__init__  sE     	&+.<V<.	
rR   c                    | j                   S rh   r\  ri   s    rP   r  zCSEVariable.__str__  s    yyrR   c                ,    t        | j                        S rh   )hashra   ri   s    rP   __hash__zCSEVariable.__hash__  s    DIIrR   c                X    t        |t              xr |j                  | j                  k(  S rh   )r>  rR  ra   )rd   r  s     rP   __eq__zCSEVariable.__eq__  s!    %-I%**		2IIrR   c                     y rh   r}   )rd   ra   rm   r  s       rP   update_on_argszCSEVariable.update_on_args  s    rR   c                N    | j                   j                   d| j                  dS rQ  )r  rw   ra   ri   s    rP   r  zCSEVariable.__repr__  s$    ..))*!DII=::rR   rh   )ra   ru   r  ValueRanges[Any]r   r   rt   )rr   r   )r  objectrr   r   )ra   ru   rm   r	   r  r	   rr   rs   )rw   rx   ry   rz   r   r  r  r  r  r  r  r  s   @rP   rR  rR    sH     (,	 ! %	J;rR   rR  AugmentedKeyT)default)boundr  .c                  >   e Zd ZdZ	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZddZddZddZddZ	dd	Z
dd
ZddZ ej                         dddd	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ ej                         df	 	 	 	 	 ddZ ej                         df	 	 	 	 	 	 	 ddZy)CSEz Common subexpression eliminationNc                    || _         || _        i | _        || _        |xs i | _        |xs i | _        |xs t        j                         | _        t               | _
        |xs i | _        y rh   )r   rY   _cachename_prefixstore_cachereduction_cache	itertoolsr   iter_buffer_idsr   invalidated_storesvarname_map)rd   r   rY   r  iter_buffersr  r  r  s           rP   r   zCSE.__init__  sm     FH&ALARPR!r 	 6B5VY__EV3=<7B7HbrR   c                6   g | j                   j                         D ]2  \  }}||vs| j                   |= | j                  j                  |       4 |r9| j                  j                         D ci c]  \  }}||v s|| c}}| _        y i | _        y c c}}w rh   )r  r  r  r  r  )rd   	keep_varsra   tmpr  r  s         rP   
invalidatezCSE.invalidate  s    44++1134 	2ID#)#$$T*''++D1	2 ,0KK,=,=,?RDAq1	>1a4RDKDK Ss   1B>Bc           	          t        |       | j                  | j                  | j                  | j                  | j
                  | j                  | j                        S )N)r   rY   r  r  r  r  r  )r?  r   rY   r  r  r  r  r  ri   s    rP   clonez	CSE.clone  sP    tDz;;;;((--(((( 00
 	
rR   c                    | j                         }t        | j                        |_        t        | j                        |_        t        | j                        |_        |S )zNReturn a copy of using ScopedDict so changes to *_cache aren't visible in self)r  r*   r  r  r  )rd   new_cses     rP   scoped_copyzCSE.scoped_copy  sH    **,#DKK0",T-A-A"B()9)9:rR   c                "    t        t        |      S )z@Override this method to augment cache key with backend specifics)r   r  rd   	cache_keys     rP   augment_keyzCSE.augment_key$  s    M9--rR   c                @    || j                   | j                  |      <   y rh   r  r  )rd   r  r  s      rP   putzCSE.put(  s    36D$$Y/0rR   c                <    | j                  |      | j                  v S rh   )r  r  r  s     rP   containszCSE.contains+  s    	*dkk99rR   c                X    | j                   j                  | j                  |      d       S rh   )r  r  r  r  s     rP   try_getzCSE.try_get.  s"    {{t//	:DAArR   c                >    | j                   | j                  |         S rh   r
  r  s     rP   r  zCSE.get1  s    {{4++I677rR   T)r  rb   
assignmentr   c          	        t        |t              r|j                  }|s|sJ t        |t              rE|j                  j                  |      |_        |xj                  dz  c_        t        t        |      S t        |t              r|j                         }n1t        |t              r|j                  }nt        |t              sJ |}| j                  |      }|s| j                  ||      }| j!                  ||       |rt"        j$                  j&                  r+t"        j$                  j&                  j)                  |d       t        |t              rP|r |j+                  | j,                   | d       |j/                  |       |j+                  | j0                         |S t        |t              rM|sJ |j+                  |j3                  | j,                   | d|j                   | j0                                |S |r | j,                   | d| | j0                   }	n| | j0                   }	|j+                  |	       |rPt4        j6                  j8                  st4        j6                  j:                  r|t=               dk7  rt?        |||       |S |j                  j                  |      |_        |xj                  dz  c_        |S )NrD   T)	only_oncez =z = rY  ) r>  r2   rc   rR  r  tightenr  r   r  r(   getvaluer%   ra  ru   r  newvarr  r5   rZ  current_nodecodegen_originating_infor  r   splicerY   re  r   r  r  r  r'   r  )
rd   r   r   r  rb   r  r   r  r  ra  s
             rP   generatezCSE.generate4  s\    dH%::D
""dK( ++--f5DKNNaN..n-I./		IdC(((Ill9%++fe,CHHY$88((HH))BB$ C  dN3!((DKK=R)@AMM$'$$T[[1: 
9  &67%%:$$$++se3tyyk$++'WX4 
- ""&++se3tfT[[MJ"&}5$$T* #"//KK%22JJ!-/1U:#FC7 
 ++F3CJMMQM
rR   c                    | j                    t        | j                         }t        j                  j                  |||      }|| j                  |<   |S rh   )r  r   r  r5   rZ  create_cse_varr  )rd   r  r   var_namer  s        rP   r  z
CSE.newvar|  sR    
 &&'T-A-A(B'CDhh%%h>%("
rR   c                    t        j                  | j                  vfd       t        j                  j                  ||      }|| j                  <   |S )Nc                     d  S )Nzduplicate name: r}   r\  s   rP   r_  zCSE.namedvar.<locals>.<lambda>  s    4DTF2K rR   )r   _check_valuer  r5   rZ  r  )rd   ra   r  r   r  s    `   rP   namedvarzCSE.namedvar  sS     	(((*K	
 hh%%dFE:!$
rR   )r  r  r  NNNN)r   ru   rY   ru   r  ru   r  zOptional[itertools.count[int]]r  z.Optional[MutableMapping[str, CSEVariableType]]r  z<Optional[MutableMapping[ReductionCacheKey, CSEVariableType]]r  z$Optional[dict[str, CSEVariableType]])r  zOrderedSet[CSEVariable]rr   rs   rr   r   )r  ru   rr   r  )r  ru   r  r  rr   rs   )r  ru   rr   r   )r  ru   rr   zOptional[CSEVariableType])r  ru   rr   r  )r   r(   r   zCUnion[str, CSEVariable, OpsValue, IndentedBuffer, DeferredLineBase]r  r  rb   r   r  r   r   r   rr   r  )r  r  r   r   rr   r  )ra   ru   r  r  r   r   rr   r  )rw   rx   ry   rz   r   r  r  r  r  r  r  r  r  r   unknownr  r  r!  r}   rR   rP   r  r    sw   *  7;FJ <@II I 	I
 5I DI
I :I0	
.7:B8 $7;#6#6#8'+FF RF
 !F F F %F 
FT $7;#6#6#8'+  % 
	 $7;#6#6#8'+	 ! %	
 
rR   r  c                  0     e Zd Zd fdZddZddZ xZS )CodeGenc                T    t         |           t        j                         | _        y rh   )r  r   rq  	ExitStack
exit_stackrd   r  s    rP   r   zCodeGen.__init__  s    $..0rR   c                :    | j                   j                          | S rh   )r(  	__enter__ri   s    rP   r+  zCodeGen.__enter__  s    !!#rR   c                >    | j                   j                  |||       y rh   )r(  __exit__)rd   exc_typeexc_valexc_tbs       rP   r-  zCodeGen.__exit__  s      7F;rR   rq   r"  r.  r	   r/  r	   r0  r	   rr   rs   )rw   rx   ry   r   r+  r-  r  r  s   @rP   r%  r%    s    1<rR   r%  c                      e Zd ZU dZded<   dZded<   dZded<   	 d 	 	 	 	 	 d! fdZej                  d"d	       Z
ej                  	 	 d#	 	 	 	 	 	 	 d$d
       Zd%dZd%dZd&dZ	 d'	 	 	 	 	 	 	 	 	 d(dZ	 	 	 	 	 	 	 	 	 	 d)dZ	 	 	 	 	 	 	 	 d*dZ	 	 	 	 	 	 	 	 	 	 d+dZd,dZ	 	 d#	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d-dZed.d       Z	 d'	 	 	 	 	 	 	 	 	 d/dZ	 	 	 	 	 	 	 	 	 	 d0dZd1dZd2 fdZd3 fdZd4dZd5dZd5dZ	 	 	 	 d6dZd7dZ d8dZ! xZ"S )9Kernelr  ru   newvar_prefixrY   Nz'Optional[Callable[[], OpsHandler[Any]]]	overridesc                   t         |           |rt        xj                  dz  c_        |xs
 t	               | _        t               | _        t               | _        t               | _	        d| _
        d| _        t        | j                  | j                        | _        t!               | _        t!               | _        d | _        d | _        d | _        d | _        t!               | _        t!               | _        i | _        d| _        d | _        y )NrD   r   )r  r   r    generated_kernel_countr  rm   r(   loadscomputestoresnum_loadnum_reductionr  r4  rY   cser   must_keep_buffersstore_buffer_names
_load_mask_load_otherr  node_to_boundsrY  r[  inplace_update_buffersmin_elem_per_threadkernel_name)rd   rm   increase_kernel_countr  s      rP   r   zKernel.__init__  s     	 **a/*(JL	#%
%'$&.1$2D2Ddkk.R2<,3=<)-4859OS0:3=<
 79##$ *.rR   c              #     K   | j                   }|| _         |j                  j                         j                         | _        	 d  || _         y # || _         w xY wwrh   )r  r  r  
get_boundsrB  )rd   r  priors      rP   set_current_nodezKernel.set_current_node  sO     !! "jj//1<<>	& %DDs   AAA A	AAc              #    K   ||}|d u x}r
t               }| j                  }| j                  }| j                  }| j                  }|| _        || _        || _        |j                         | _        	 d  || _        || _        || _        || _        |r
|rJ d       y y # || _        || _        || _        || _        |r
|rJ d       w w xY ww)Nz$unexpected store inside swap_buffers)r(   r8  r9  r:  r=  r  )	rd   lbcbsbdisallow_storesr8  r9  r:  r=  s	            rP   swap_bufferszKernel.swap_buffers  s      :B Dj(?(!B

,,hh
??$	FDJ"DL DKDHEEEv2 	 DJ"DL DKDHEEEv2 s   A/C2B 6)C*C		Cc                    t         rh   r   rz  s      rP   r  zKernel.load  r  rR   c                    | j                   }	 | j                  | _         | j                  ||      || _         S # || _         w xY w)z+A load the depends on an index we have read)r8  r9  r  )rd   ra   rz  rI  s       rP   indirect_loadzKernel.indirect_load  s8    

	DJ99T5)DJDJs	   "8 	Ac                    t         rh   r   r~  s       rP   r  zKernel.store_reduction  r  rR   c                    t         rh   r   r|  s        rP   r  zKernel.store  
     "!rR   c                    t         rh   r   r  s        rP   r  zKernel.reduction  
     "!rR   c                    t         rh   r   r  s       rP   r  zKernel.scan  s
     "!rR   c                    t         rh   r   r  s        rP   r  zKernel.sort  rX  rR   c                    t         rh   r   ri   s    rP   
var_rangeszKernel.var_ranges   r  rR   c                    t         )z3
        See [Note: Inductor bucketize op]
        r   r  s           rP   r  zKernel.bucketize#  s
     "!rR   c                    t         rh   r   ri   s    rP   assert_functionzKernel.assert_function2  s    !!rR   c           	     v   t        |t              rt        |      }t        |t              sJ t        |             |t        |t              sJ |t        |t              sJ |r|rd| d| d| d| d	}| d| d| }n|r
| d| }|}n|sJ | d| }|}|r	d| d| d}| j                   d| d| dS )	NrH  z <= z) & (z < r   z) | ~(z, "index out of bounds: z"))r>  rR  ru   r?  r_  )rd   r  rv  rw  maskr>  
cond_prints          rP   indirect_assertzKernel.indirect_assert6  s    c;'c(C#s#.T#Y.#}
5# 666}
5# 666U ugT#eC5E7!<D!7$se3ug6JWD&DJL5U#eW%DJtfF4&*D&&'q.FzlRTUUrR   c                    t         rh   r   ru  s        rP   rx  zKernel.check_boundsT  rV  rR   c                    t         rh   r   r  s     rP   index_to_strzKernel.index_to_strY  r  rR   c           	     (   t         |           | j                  sJ | j                  j	                  t        j                  t        | | j                                            | j                  j	                  t        j                  |              | S rh   )	r  r+  r5  r(  enter_contextr5   set_ops_handlerCSEProxyset_kernel_handlerr)  s    rP   r+  zKernel.__enter__\  sl    ~~~%%htT^^-=>?	
 	%%a&:&:4&@ArR   c                H    | j                          t        | 	  |||       y rh   )remove_kernel_local_buffersr  r-  )rd   r.  r/  r0  r  s       rP   r-  zKernel.__exit__e  s     ((*7F3rR   c                   t         j                  j                  syt        fd| j                  D              }t               | j                  D ]N  }|| j
                  vs|| j                  j                  vs+j                  ||      s>j                  |       P D ]  }|| j                  j                  v rw| j                  j                  |   }t        |t              rEt        fd|j                  D              }|r| j                  |       | j                   j                  |       | j#                  |        y)z
        Any buffers that are both created and have a last use in the
        same kernel can be removed.

        Note that V.graph.scheduler can be None when codegening triton template
        kernels.
        Nc              3  t   K   | ]/  }|j                   v rj                   |   j                          1 y wrh   )name_to_bufdefining_op_name)r  r  r  s     rP   r  z5Kernel.remove_kernel_local_buffers.<locals>.<genexpr>t  s;      &
i+++ !!#&779&
s   58c              3  &   K   | ]  }|v  
 y wrh   r}   )r  r  names_to_removes     rP   r  z5Kernel.remove_kernel_local_buffers.<locals>.<genexpr>  s     KaQ/1KrW  )r5   r   r  r   r?  r>  rm   r  $can_buffer_be_removed_through_fusionr  r  r>  r}  r  rv  remove_inplace_bufferr[  remove_buffer)rd   fused_node_namesra   r  r`   rs  r  s        @@rP   rm  z"Kernel.remove_kernel_local_buffersi  s%    GG%%	% &
..&
 

 ,6<++ 	*DD222		 7 77BB*  ##D)	* $ 
	)Dtyy000ii//5c:.K3??KK..t4''++D1""4(
	)rR   c                    t         j                  d|       t        | j                  j                  |<   | j
                  j                  |       y )Nzremove_buffer(%r))r  rN   r  rm   r  rY  r  r   s     rP   rv  zKernel.remove_buffer  s;     			%t,)0		  &  &rR   c                    t         j                  d|       t        | j                  j                  |<   | j
                  j                  |       y )Nzremoving_inplace_buffer(%r))r  rN   r  rm   r  rY  r  r   s     rP   ru  zKernel.remove_inplace_buffer  s9    		/6*1		!!$'  &rR   c           	        t        |t        t        f      r|D cg c]  }| j                  |       c}S t        j
                  j                  j                  |      }t        |j                  d       }|D ci c]W  }t        |t        j                  t        j                  t        j                  f      r|| j                  j!                  |      Y }}t#        ||      S c c}w c c}w )Nc                    | j                   S rh   r\  )ss    rP   r_  z(Kernel.rename_indexing.<locals>.<lambda>  s
    !&& rR   )r  )r>  listtuplerename_indexingr5   r   r  r  sortedfree_symbolsr   r   UNBACKED_INTSIZEPRECOMPUTED_SIZErm   r   r-   )rd   rz  r  sorted_symbolsreplacementss        rP   r  zKernel.rename_indexing  s    
 edE]+5:;D((+;;  ))%0 2 28HI $
%%II)) tyy~~a  
 
 %.. <
s   C%;AC*c                    t        |i |S rh   )rR  )rd   rm   r  s      rP   r  zKernel.create_cse_var  s    D+F++rR   c                Z    |y| j                   j                  |j                               S )zC
        Returns arg name of a given input or output node.
        N)rm   r  r   )rd   r  s     rP   r  zKernel.arg_name  s'     <yy!!$--/22rR   )NT)rm   zOptional[KernelArgs]rF  r   rr   rs   )r  rC   rr   rp  r  )rL  r(   rM  Optional[IndentedBuffer]rN  r  rr   rp  ra   ru   rz  r   rr   rR  ra   ru   rz  r   rc   rR  rr   rs   rh   
ra   ru   rz  r   rc   rR  rX   r4   rr   rs   
r   r   r  r   r  r3   rc   +Union[CSEVariable, tuple[CSEVariable, ...]]rr   r  r  r  r  zUCallable[[tuple[CSEVariable, ...], tuple[CSEVariable, ...]], tuple[CSEVariable, ...]]r  tuple[CSEVariable, ...]rr   r  
r  r  r  r  r  r   r  r   rr   r  )rr   zdict[sympy.Symbol, sympy.Expr]r  rR  r  r  r  rR  r  r   r  r   r  r  r  zOptional[CSEVariable]rr   rR  rt   )
r  zUnion[CSEVariable, str]rv  r   rw  r   ra  z!Optional[Union[CSEVariable, str]]rr   ru   r  )rz  r   rr   ru   r"  r1  rq   ra   ru   rr   rs   )rz  z;Union[list[sympy.Expr], tuple[sympy.Expr, ...], sympy.Expr]rr   r   )rm   r	   r  r	   rr   rR  )r  r>   rr   r   )#rw   rx   ry   r4  r{   rY   r5  r   rq  rr  rJ  rP  r  rS  r  r  r  r  r  r\  r  r|   r_  rc  rx  rf  r+  r-  rm  rv  ru  r  r  r  r  r  s   @rP   r3  r3    s   M3FC9=I6= PT /( /HL /	 /D & &  (,'+	FF %F %	F
 
F F8"" SW"" *"3>"FO"	"
"" " &	"
 ;" 
5""'"
" (" 
!""'" (" 	"
 " 
!"" 4804"" C" &	"
 $" " 1" ." 
" " " 37V$V V 	V
 0V 
V<""&0"9="FJ"	"
"4%)N''
/P/	/.,3rR   r3  c                  8    e Zd ZU dZded<   dZded<   dZded	<   y)
r  r  zClassVar[str]r  Nr   r   r  ru   ops_name)rw   rx   ry   r  r{   r   r  r}   rR   rP   r  r    s!    "C"#'E 'HcrR   r  c                 b    	 dd l } | j                  | j                        S # t        $ r Y y w xY w)Nr   )	undefined)jinja2EnvironmentStrictUndefinedImportError)r  s    rP   
jinja2_envr    s?    !!,, " 
 	
  s   " 	..c                      e Zd ZdZe	 d		 	 	 	 	 	 	 d
d       Zedd       Ze	 	 	 	 dd       ZddZ	 	 	 	 	 	 ddZ	ddZ
y)KernelTemplatezg
    Base class for defining kernel templates.

    Children classes: TritonTemplate, CUDATemplate
    c                    | j                  d      }t        |      dkD  r|dd  D cg c]  }d|z  |z  |z    c}|dd  dj                  |      S c c}w )NTrD   r  r  )
splitlinesr  r   )sourcenum_indentsindents_spacinglinesra  s        rP   indent_except_firstz"KernelTemplate.indent_except_first  sd     !!$'u:>INqrAE&4<E!"I wwu~s   Ac                    t               }|y t        j                  |j                  d<   ddlm} 	 |j                  |       S # |$ r} G d d|      } ||      |d }~ww xY w)Nr  r   )TemplateSyntaxErrorc                  (     e Zd Zd fdZddZ xZS )IKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxErrorc                    t         |   |j                  |j                  |j                  |j
                         || _        y rh   )r  r   messagelinenora   filenameoriginal_error)rd   r  r  s     rP   r   zRKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxError.__init__  s>    G$&..&--&++&//	 +9D'rR   c                F   d| j                    d}|d| j                   dz  }t        | j                  d      r| j                  j                  j                  d      }|dz  }t        d| j                   dz
        }t        t        |      | j                   dz         }t        ||      D ]s  }|| j                   dz
  k(  rN||dz    d	||    dz  }t        | j                  d
      s=|dd| j                  j                  dz
  z  z   dz   z  }c||dz    d||    dz  }u |S )NzError in template at line 
zError message: r  z	Context:
r   r   rD   z: --> columnz     r  z^
z:     )r  r  r  r  r  splitmaxminr  rl  r  )rd   
error_infor  startendrK  s         rP   r  zQKernelTemplate._template_from_string.<locals>.DetailedTemplateSyntaxError.__str__  sA   #=dkk]"!MJODLL>"DDJt22H= $ 3 3 : : @ @ F"l2
 #At{{Q 7!#e*dkkAo>!&uc!2 
KA DKK!O3 *QveAhZr.J J
#*4+>+>#I$.(/*-1D1D1K1Ka1O*P)Q*/)0%&J !+QveAhZr.J J

K &%rR   )r  r  rr   rs   rt   )rw   rx   ry   r   r  r  r  s   @rP   DetailedTemplateSyntaxErrorr    s    9&rR   r  )r  r  r  filtersr  r  from_string)r  envr  er  s        rP   _template_from_stringz$KernelTemplate._template_from_string  sj    l;-;-O-O)*.#	8??6**" !	8&.A &> .a0a7C!	8s   A A!AA!c                   t         j                  j                  t        | t        t
        f      r.| D ci c]!  }|j                         |j                         # c}n | j                         | j                         idfd}|S c c}w )Nc                >    j                  |       }||S  |       S rh   )r  )ra   r  _get_dtype_reallookups     rP   r   z1KernelTemplate._fake_get_dtype.<locals>.get_dtype	  s'    ZZ%F!"4((rR   )ra   ru   rr   r   )r5   r   r   r>  r}  r~  r   )	fake_outsr  r   r  r  s      @@rP   _fake_get_dtypezKernelTemplate._fake_get_dtype	  sr     ''++i$/AJK#cllncmmo5KF((*I,?,?,ABF	)  Ls   &B
c                    || _         y rh   r\  r   s     rP   r   zKernelTemplate.__init__%	  s	    	rR   c                   	 |j                   | j                  di |       y# t        $ rQ}t        j	                  d|t        |       t        j                         t        j                  k         |cY d}~S d}~ww xY w)a%  
        Maybe generates a new ChoiceCaller and appends it into existing choices.
        Returns None if success, otherwise returns the error.

        choices: A list of ChoiceCallers.
        kwargs: Additional kwargs to be passed to self.generate() to generate a new ChoiceCaller.
        Nz3Cannot Append Choice: %s. KernelTemplate type is %s)
stack_infor}   )	r  r  r   r  infor?  getEffectiveLevelrL   INFO)rd   choicesr  r  s       rP   maybe_append_choicez"KernelTemplate.maybe_append_choice(	  sn    
	NN=4==2623" 	HHET
002W\\A	   H	s   !$ 	A>AA93A>9A>c                    t         )zM
        Generates a ChoiceCaller instance from the given arguments.
        r   )rd   r  s     rP   r  zKernelTemplate.generate?	  s
    
 "!rR   N)   )r  ru   r  r   r  r   rr   ru   )r  ru   rr   r	   )r  zUnion[list[Buffer], Buffer]rr   zCallable[[str], torch.dtype]r  )r  rv   r  r	   rr   zOptional[NotImplementedError])r  r	   rr   r<   )rw   rx   ry   rz   r   r  r  r  r   r  r  r}   rR   rP   r  r    s     >?"%8;	  *8 *8X .	% " ,/	&."rR   r  c                  "    e Zd Zd Zd fdZddZddZ	 	 d	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 	 	 	 	 ddZddZ	ddZ
	 d	 	 	 	 	 	 	 	 	 ddZdd	Z	 	 	 	 	 	 	 	 	 	 dd
Z	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 	 	 	 	 ddZ	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ xZS )rj  c                b    t         |           ddlm}  |       | _        || _        || _        y )Nr   ValueRangeAnalysis)r  r   r  r  vr_analysisrZ  parent_handler)rd   rZ  r  r  r  s       rP   r   zCSEProxy.__init__J	  s+    /-/,rR   c                  	
  | j                   gi 	 t        | j                        i }t               }t	               d 
dk(  rdk(  r|j
                  
nodk(  rRdk(  rMt        j                  j                  j                  j                  t        j                  d       j
                  
ndv rt        |      } |i 
dv r
J dd	
fd}t        j                  ||      S )	Nmaskedr[  rY  )r[  rY  re  )r[  rY  r   c                   t        t        t        f      r	   n}	dz  	dk(  r#t        | t              r| j                  || _        t
        j                  j                  j                  t
        j                  j                  |       }|j                         t        j                  j                  st        j                  j                  r)|J t        t
        j                  j                  ||       |S )NrD   rY  )r  r   )r>  r}  r~  rR  r   r5   rZ  r=  r  r9  r  r   r  r  r  r  )
r  	var_dtypecsevarrm   r  r  r  ra   r  
output_idxs
      rP   do_csez!CSEProxy._default.<locals>.do_csek	  s    
 lT5M: Z(! 
 !OJ %Jq+$>177?#XX\\**  "	 + F !!$f5 ##??&&>> ,,,AHH,,fi@MrR   )r  ru   rr   rR  )_bound_variabler  r  r!   r'   r   r5   interpreterr  r  r  r  r  pytreetree_map)rd   ra   rm   r  rc   dtype_handlerdtype_opr  r  r  r  r  s    ```    @@@@rP   _defaultzCSEProxy._defaultR	  s	   %%%d<T<V<2++T2DCFC24%'88 3 ;;LX'U"2==55::>>#''e  00}d3H#T4V4L''+++
	 	> vu--rR   c                z  	 ddl m} ddlm} ddlm} t        t        j                  |      rt        j                         S t        t        j                  |      rt        j                         S t        j                  j                  		j                  |k(  r| j                  j                  t        | j                  j                  t              s$J t!        | j                  j                               | j                  j                  j#                  	t        j                               S t$        j&                  rjt)        ||      r^t+        	fddD              rt        j                         S |rJ d	d}t-        t/        ||            } t1        | j2                  |      | S t        j                         S )
z
        If the variable comes from an FX node, we forward the bound we have already computed
        Else, if the variable when codegen'ing another op, we try to compute its bounds
        r   r  )TritonTemplateKernelrD   )CUDATemplateKernelc              3  :   K   | ]  }|j                   v   y wrh   )r  )r  r|  fx_nodes     rP   r  z+CSEProxy._bound_variable.<locals>.<genexpr>	  s     V11&Vs   )set_indirectr  r  c                    t        | t              r| j                  S t        | t        j                        rt        |       S | S rh   )r>  rR  r  r   r  r   r  s    rP   arg_to_boundz.CSEProxy._bound_variable.<locals>.arg_to_bound	  s2    a-88O5::.&q>)HrR   )r  r	   rr   r	   )r  r  select_algorithmr  cuda.cuda_kernelr  r>  r5   rZ  r   r#  r  r  r  rB  dictr?  r  r   compute_all_boundsr  rX  r}  r  r  r  )
rd   ra   rm   r  r  r  r  r  
arg_boundsr  s
            @rP   r  zCSEProxy._bound_variable	  sP   
 	0;8ahh 45&&((ahh 23&&((--,,>>T!dkk&@&@&Ldkk88$? **B ? ;;--11';;N;N;PQQ&&73Et+L V0UVV"**,, : c,56J274++T2J??""$$rR   c                $   t        |t              rt        j                  |      }t        |t        j                        sJ t        |      |f       |j                  j                  dk  r|rt        j                  |t        j                  |t        j                              }|j                  j                  dk\  r0t        j                  |d      }t        j                  |||      }n|}t!        j"                         }|j                  t!        j"                         k7  rt        |t        j$                        r|j                  t!        t&         d      z  }t!        |j                  |z   |j                  |z         }|j                  j                  dk\  r"|j                  t!        dt&              z  }	||	z  }| j(                  j*                  j-                  | j(                  j.                  ||      }| j0                  j3                  |||      }
t5        |      ro|j                  j                  dk\   }t        |t        j$                         xs |j                  j                  |k   }| j(                  j7                  |
|||       |
S )Nr   r  )r  )r>  r   r   rn  r  r?  r  rv  r0   r  r  r   longrw  ltr<  r   r#  Numberr   rZ  r=  r  r9  r  rr  r&   rx  )rd   r  r   rp  rq  stmr  
new_bounds
neg_boundspos	sympy_varassert_lowerassert_uppers                rP   rr  zCSEProxy.indirect_indexing	  s    dC ==&D$

+?d4j$-??+ ::aggc3>>$

#CD::##q(QB))BS1C %,,.Jzz[0022z$7U !ZZ+vgr*BB
($$t+Z-=-=-D
 ::##q(**{1f'==C!+c!1J++//**4;;+>+>J*WC''99#tUK	5! #

 0 0A 56L)$== 

  4'BL KK$$YlLQrR   c                >    | j                   j                  ||||      S rh   )rZ  rx  ru  s        rP   rx  zCSEProxy.check_bounds	  s     {{''dE5AArR   c                   || j                   j                  j                  v r)t        j                   j                  j                  |       t        |t        j                        r| j                   j                  ||      S | j                   j                  j                  }||v r||   S | j                   j                  ||      }|j                  dk(  r| j                   xj                  dz  c_        |S r   )rZ  r=  r  r5   r>  r  r   r   TMPrS  r  r  r  r;  )rd   ra   rz  r  outs        rP   r  zCSEProxy.load	  s    4;;??555 HH&&**40udhh/;;,,T599kkoo11;t$$kktU+ ==AKK  A% 
rR   c                l   || j                   j                  j                  |<   | j                   j                  r{|t        j
                  j                  v r^| j                   j                  j                  |      }|j                         D ]%  }|| j                   j                  j                  |<   ' y y y rh   )	rZ  r=  r  r  r5   r   name_to_buffer
get_outputget_mutations)rd   ra   rc   r  
other_names        rP   _update_store_cachezCSEProxy._update_store_cache 
  s    ,1##D);;##0F0F(F++**55d;C!//1 @
:?++J7@ )G#rR   c                    | j                   j                  j                  |       || j                  ||       |t        j
                  j                  vr | j                   j                  ||||       y y )N)rX   )rZ  r?  r  r  r5   r   rY  r  r|  s        rP   r  zCSEProxy.store
  sc     	&&**40<$$T51qww...KKdE5t< /rR   c                    | j                   j                  j                  |       | j                  ||       |t        j
                  j                  vr| j                   j                  |||      S y rh   )rZ  r?  r  r  r5   r   rY  r  r~  s       rP   r  zCSEProxy.store_reduction
  sZ    &&**40  u-qww...;;..tUEBB /rR   c                |    | j                   xj                  dz  c_        | j                   j                  ||||      S r   )rZ  r<  r  r  s        rP   r  zCSEProxy.reduction
  s4     	!!Q&!{{$$UI~uMMrR   c                <    | j                   j                  |||      S rh   )rZ  r  r  s       rP   r  zCSEProxy.scan!
  s     {{
F;;rR   c                >    | j                   j                  ||||      S rh   )rZ  r  r  s        rP   r  zCSEProxy.sort,
  s     {{
CCrR   c           	     D    | j                   j                  |||||||      S )a  
        [Note: Inductor bucketize op]

        Inputs:
        -------
        values: the values to be bucketized.
        boundaries: a tuple containing
          (a) the name of the boundaries tensor (which must be sorted, unless
          the sorting tensor is present),
          (b) the length of the tensor in the last dimension (i.e. the length of
          one set of boundaries),
          (c) the number of elements in the underlying storage (i.e. the length
          of the flattened tensor, ignoring striding), and
          (d) the stride of the tensor in the last dimension.
        boundary_indices: indices into a flattened version of the boundaries
        tensor, of the same size and shape as "values".  Each index points to
        the first element in the set of boundaries to be used for the
        corresponding value.
        indexing_dtype: the dtype to use when indexing into the boundaries
        tensor.  This must be int64 or int32.  This additionally specifies the
        dtype of the return value.
        right: see "Details" below.
        sorter: an optional tuple containing
          (a) the name of an optional sorting tensor, used to access unsorted
          boundaries without reordering the boundaries tensor, and
          (b) the stride of the tensor in the last dimension.
        The values in the sorting tensor are used as indices into the *last*
        dimension of the boundaries tensor, with all other indices matching.
        The size of the sorting and boundaries tensors must be equivalent.
        sorter_indices: must be present if the sorting array is present; see
        "boundary_indices" for the equivalent definition for the boundaries
        tensor.

        Output:
        -------
        The buckets each value belongs in, within a given set of boundaries.  0
        indicates a position before the first boundary, and len(boundaries_set)
        represents a position after the last boundary.

        Details:
        --------
        Given a value and a set of boundaries, calculate the bucket that each
        value belongs to.  This works differently in 1-D and N-D cases.

        for values [[-1, 0, 1, 2], [3, 4, 5, 9]], boundaries [0, 4, 4, 8], right=True
        return =   [[ 0, 1, 1, 1], [1, 3, 3, 4]].

        for values [[-1, 0, 1, 2], [3, 4, 5, 9]], boundaries [[0, 4], [4, 8]], right=True
        return =   [[ 0, 1, 1, 1], [0, 1, 1, 2]]

        Note that in the N-D boundaries case, the shape of "values" and
        "boundaries" must match in every dimension _except_ the last.

        When right == False, bucket i refers to range (boundaries[i], boundaries[i+1]].
        When right == True,  bucket i refers to range [boundaries[i], boundaries[i+1]).

        Boundaries must be non-decreasing, or a sorter must be provided which
        would re-index offsets in a non-decreasing order (e.g. the second output
        of torch.sort(offsets)).  Otherwise, the result is undefined.
        )rZ  r  r  s           rP   r  zCSEProxy.bucketize5
  s1    L {{$$
 	
rR   )rZ  zKernel[Any]r  zOpsHandler[Any])ra   ru   rm   ztuple[Any, ...]r  zdict[str, Any]rr   r	   )ra   ru   rm   r	   r  r	   rr   r  r  )
r  rR  r   r  rp  r   rq  r   rr   r  r  r  )ra   ru   rc   rR  rr   rs   rh   r  r  r  r  r  r  r  )rw   rx   ry   ra   r   r  r  rr  rx  r  r  r  r  r  r  r  r  r  r  s   @rP   rj  rj  G	  s   D-8.t+%b // %/ 	/
 / 
/bBB&0B9=BFJB	B
"@ SW== *=3>=FO=	=CNN N &	N
 ;N 
5N	<'	<
	< (	< 
!	<D'D (D 	D
 D 
!D  4804N
N
 CN
 &	N

 $N
 N
 1N
 .N
 
N
rR   rj  )rO   ru   rr   rs   r  )r   ru   r+  r   r,  r   r-  r   r.  Optional[CustomGraphModulePass]rr   rs   )r   Union[torch.device, str, None]rr   zOrderedSet[BackendFeature])r   r  rE  r1  rr   r   )r   ru   rr   zOptional[SchedulingConstructor])F)r   ru   rI  r   rr   r   )r   ru   rr   r
  rq   )rz  Sequence[sympy.Expr]r{  r  r|  r  rr   r   )r   ru   r  r   rr   rs   )r   ru   rr   r   )r  ru   rm   r	   r  r	   rr   r   )r   r(   r  r  r   r   rr   rs   )rJ  ru   rr   r   r}   r  )rr   r	   )
__future__r   r]   rq  dataclassesenumr  r  rL   r  r  r_   rer[   abcr   r   r   r   r   typingr	   r
   r   r   r   r   r   r   r   typing_extensionsr   r   r   r   torch.fxtorch._prims_commonr   torch.utilsr   r  torch.utils._ordered_setr   torch.utils._sympy.numbersr   torch.utils._sympy.printersr   _PythonPrintertorch.utils._sympy.symbolr   r   r   torch.utils._sympy.value_rangesr   r   r  r   r    dtype_propagationr!   ops_handlerr"   r#   utilsr$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   virtualizedr0   r1   r2   r3   r4   r5   collections.abcr6   r7   r8   r9   custom_graph_passr:   r   r;   r<   r=   r>   r  r@   r  rA   rB   rC   ro  rF   rG   r   r?  r   ru   r  r  rC  _logginggetArtifactLoggerrw   rJ   	getLoggerr  rQ   	dataclassrT   r   r   r   r   r   r   r   r   r   KernelArgTyper   r{   r   r(  r)  r/  r1  rA  rF  r@  rK  rM  cacher=  r}  r  r  bfloat16r  float16r   r  float64int8int16r  r  r   uint16r  uint64r  r  r  r  r  compile
IGNORECASErS  rM  rO  r  r  INT_TO_FLOATr  r]  r_  rg  ru  rx  r}  r  r  rR  r  r  r~  r   ReductionCacheKeyr  r%  r3  r  r  r  rj  r   s   0rP   <module>r4     s   "          	 	  #  
 
 
 ,    ? ) / - G O O D  : ;    Q P BB$9>>$DD-	B$hy&9%:N%JK23sELL()J F~~//*Eg!=
   >/		 /(C  Ta= a aH* * # # #       ! ! ! = = = lIw8H,VW,.) .5" 5"p :< 6 ;DF A F8 @D:>
7
7,
7 /
7 !=	
7
 8
7 

7
&T 
&3*33$3*35C3	3U
 &+

"
!
V K K\UU$U  U 	U;;&7;	;, 
NNEKK	MM5;;> JJMMMMJJKKKKKKKKLLLLLL
 	u> : ,''' ' 	'T::!0:9D:	:2aB aBH%N %S1 S1l "rzz";2==Q D;#%5z# D;N - - -  6: `6;HH-`6 ;HH/- 	`6 ;HH/- 	`6$ ;HH/- 	%`60 ;HH/- 	1`6< ;HH*)	=`6L ;HH(0	M`6X 	;HH66>	Y`6h ;HH1i`6r ;HH2s`6| ;HH1}`6F ;HH2 G`6P ;HH%8$Q`6^ 	;HH&%		_`6j ;HH%8	k`6v 	;HH&	w`6@ ;HH+A`6L %;HH88)	M`6X %;HH88)	Y`6d %;HH8)e`6n %;HH8)o`6z 
;HH'
{`6D ;HH(E`6N ;HHc	O`6^ ,;HH?0_`6h ,;HH?0i`6t &;HH9*u`6~ 
;HH*
`6H );HHD-I`6R );HHD-S`6\ );HHD-]`6f );HHD-g`6p (;HHC,q`6z $1;HHL5${`6D $1;HHL5$E`6N $1;HHL5$O`6X $1;HHL5$Y`6b ';HHB+c`6l (;HHC,m`6v (;HHC,w`6 2 `F	-# -"> *Z 
 N N N 
 ,J JZ
!; !;H 5+;Tk5c!1223	5e'/=0
1 eP
< 
<\3Wgo. \3~     p" p"f|
~ |
O8s   7
e