
    rhH                      U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZmZmZ d dlmZmZ d dlmZ d dlmZ d dl
mZmZmZmZmZmZmZmZmZ d dlmZmZm Z  d d	l!m"Z" d dl#Z#d d
l#m$Z$m%Z%m&Z& d dl'm(c m)c m*Z+ d dl,m-c m.Z/ d dl0Z1d dl2Z1d dl3m.c m4Z5 d dl6m7Z7 d dl8m9Z9 d dl:m;Z; d dl<m=Z= d dl>m?Z?m@Z@mAZAmBZBmCZC d dlDmEZE d dlFmGZGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZOmPZP d dlQmRZR d dlSmTZTmUZUmVZV d dlWmXZX ddlYmZZZm[Z[ ddl\m]Z]m^Z^m_Z_m`Z` ddl[maZambZbmcZcmdZdmeZe ddlfmgZg ddlhmiZimjZjmkZkmlZl ddlmmnZn ddlompZpmqZq ddl.mrZrmsZsmtZtmuZumvZvmwZwmxZxmyZymzZzm{Z{m|Z|m}Z}m~Z~mZmZmZmZmZmZmZmZ ddlmZmZmZ erd dlmZ d d lmZ dd!lmZ dd"lmZ dd#l.mZ neZd$ed%<   	 d dlZej.                  Zd&Z ed(      Z ed)      Z ed*      Zeee$f   Zd$ed+<   eeee$f   Zd$ed,<    ejD                  e      Z ej&                  ejJ                  d-.      Ze1j                  jL                  Z	 eed/eed/f   d0d1eeeeeed/f   d/d0d1f         f   Zd$ed2<   dd3Z ejV                  d&4       G d5 d6             Zdd7Zdd8Zdd9Zdd:Z	 	 	 	 	 	 dd;Zdd<Zg d=Zg d>Z	 d	 	 	 	 	 dd?Zdd@Z	 d	 	 	 	 	 ddAZedddB       ZedddC       Z	 d	 	 	 	 	 d dDZ	 	 	 	 d!dEZ	 	 	 	 d"dFZd#dGZd#dHZd$dIZ	 	 	 	 	 	 	 	 d%dJZ	 	 	 	 	 	 d&dKZd'dLZd(dMZ G dN d1      Z e~d'4       G dO dP             Ze~ G dQ dRe«             ZĐd)dSZe~ G dT dUeī             Ze~ G dV dWeƫ             Z edX       edY       edZ       ed[       ed\       ed]      d^Zd_ed`<   	 d	 	 	 	 	 	 	 d*daZe~ G db dceī             Zeee$   ee$   gef   Z G dd deeʫ      Z G df dge̫      Z G dh die̫      Ze~ G dj dkeī             Ze~ G dl dmeϫ             Ze~ G dn doeī             Zѐd+dpZҐd+dqZ	 	 	 	 	 d,	 	 	 	 	 	 	 	 	 	 	 	 	 d-drZ	 	 	 	 	 	 d.dsZՐd/dtZe~ G du dve«             Ze~ G dw dxe׫             Ze~ G dy dze׫             Ze~ G d{ d|e׫             Ze~ G d} d~e׫             Ze~ G d de۫             Ze~ G d de׫             Ze~ G d de׫             Z G d deܫ      Ze~ G d de«             Ze~ G d de             Ze~ G d de             Z	 	 	 	 	 	 d0dZd1dZ G d d      Ze~ G d de             Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Ze~ G d de             Z G d de      Z e~d'4       G d dee^             Z e~d'4       G d deeë             Z G d de      Z G d de      Z G d de      Ze~ G d de«             Ze~ G d de«             Z e~d'4       G d de             Z G d de      Z G d de      Zeeeeeeeeeeef      f   Z G d d      Z G d de      Z G d de      Z G d de      Z G d de      Z e~d'4       G d de             Z  G d de       Z G dÄ dĐe      Z e~d'4       G dń dƐe              Z e~d'4       G dǄ dȐe             Z G dɄ dʐe      Z G d˄ d̐e      Z G d̈́ de      Z G dτ dАe      Z G dф dҐe      Z	 G dӄ dԐe      Z
 G dՄ d֐e      Z G dׄ dؐe      Z G dل dڐe      Z G dۄ dܐe      Z G d݄ dސe      Z G d߄ de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z e~d'4       G d d             Z G d de      Z e~d'4       G d de             Ze~ G d de             Z G d de      ZejV                   G d de«             Z G d d/e      Z G d de      Z e~d'4       G d de«             Zd2dZ  e~d'4       G d d e             Z! e~d'4       G d de             Z"	 	 	 	 d3dZ# e~d'4       G d de             Z$ G d de      Z% G d d	e«      Z&e~ G d
 de&             Z'e~ G d de&             Z( G d de      Z) G d de)      Z*d4dZ+d4dZ,y# e$ r dZd'ZY w xY w(5      )annotationsN)	GeneratorIterableSequence)AbstractContextManagernullcontext)Enum)partial)	AnyCallableClassVarLiteralOptionaloverloadTYPE_CHECKINGTypeVarUnion)assert_neverNever	TypeAlias)patch)ExprIntegerSymbol)identity)GraphModuleSerializer)can_auto_functionalize)metrics)compute_required_storage_lengthis_boolean_dtypeis_float_dtypemake_channels_last_strides_for
StrideType)get_schema_info)
&_remove_effect_token_unbacked_bindingscompute_unbacked_bindingsfree_symbolsfree_unbacked_symbolsIterateExprsrebind_unbackedresolve_unbacked_bindingsShapeEnvstatically_known_trueSymTypes
OrderedSet)CleanDivFloorDivModularIndexing)SymT   )configdependencies)BackendFeatureCodegenSymbolget_scheduling_for_deviceindex_prevent_reordering)Depextract_free_symbols#extract_input_node_reduction_rangesextract_read_writesvar_builder)LoopBody)OpCounterCSEOpCountResultReductionType	StoreMode)benchmarker)DevicePropertiesReductionHint)argsortargsort_symcache_on_selfceildivconvert_shape_to_inductorconvert_shape_to_symintdeveloper_warningdo_bench_using_profilingdtype_from_sizeget_dtype_sizeget_kernel_metadataGPU_ALIGN_BYTESir_dataclass
is_dynamicis_gpu	sympy_dotsympy_index_symbolsympy_index_symbol_with_prefixsympy_product
sympy_substensor_is_aligned)opsOpsValueV)FakeScriptObject)Node)CUDATemplate)GraphLowering)IndentedBufferr   rc   TF_T_U_V_IntLike_NumLikez  prefix	TensorBoxr   IRNode_NodeOrNodesc                .    t        | t        t        f      S N)
isinstanceintr   xs    e/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/torch/_inductor/ir.py
_is_staticrw      s    a#w((    )frozenc                  J    e Zd ZU ded<   ded<   ded<   ded<   d	ed
<   ded<   y)GraphPartitionSignatureOrderedSet[sympy.Symbol]symbol_inputsz5dict[str, Union[IRNode, sympy.Expr, TorchBindObject]]input_nodeslist[IRNode]output_nodeszdict[str, bool]input_deallocationboolskip_cudagraphz	list[str]constant_namesN__name__
__module____qualname____annotations__ rx   rv   r{   r{      s/     ,+ GF (' rx   r{   c                "    dfd |        y )Nc                   | y t        | t        t        f      r| D ]
  } |        y t        | t              r| j	                         D ]
  } |        y t        | t
        t        t        t        t        j                  j                  j                  t        t        t        t         f	      sJ dt#        |        d       y )NzFound zE, which is not a supported top level IR node. See [Note: Inductor IR])rr   listtupledictvalues
ExpandViewDynamicScalarAssertScalarrm   sympylogicboolalgBooleanr   rs   EffectfulKernelShapeAsConstantBuffertype)nodesnode_check_tensorboxs     rv   r   z%validate_ir.<locals>._check_tensorbox   s     =e}- ' &'t$ ' &' ! KK''//#)
  e%jk rx   )r   Optional[_NodeOrNodes]returnNoner   )node_or_nodesr   s    @rv   validate_irr      s    < ]#rx   c                8     t         t              sJ d fd}|S )Nc                 0     t        t              | i |S rq   )getattrr^   )argskwargsnames     rv   fnzops_wrapper.<locals>.fn  s    !wsD!42622rx   )r   objectr   r   r   r_   )rr   str)r   r   s   ` rv   ops_wrapperr      s    dC   3 Irx   c           
     b    t        t        | t        t        |                         dfd}|S )Nc                    t        |       t              k(  sJ t        t        |             D cg c]
  }| |       c}S c c}w rq   lenrange)indexi	inv_orders     rv   reindexz inverse_reorder.<locals>.reindex
  s?    5zS^+++-23u:->?il#???   Ar   Sequence[_T]r   r   )r   zipr   r   )orderr   r   s     @rv   inverse_reorderr     s*    Sc%j 123I@ Nrx   c                     d fd}|S )Nc                    t        |       t              k(  sJ t        t        |             D cg c]
  }| |       c}S c c}w rq   r   )r   r   r   s     rv   r   zsame_reorder.<locals>.reindex  s>    5zSZ''').s5z):;AeAh;;;r   r   r   )r   r   s   ` rv   same_reorderr     s    < Nrx   c                     d fd}|S )Nc                       |             S rq   r   )r   reindex1reindex2s    rv   r   z fuse_reindexing.<locals>.reindex  s    ((rx   )r   r   r   zSequence[_V]r   )r   r   r   s   `` rv   fuse_reindexingr     s    ) Nrx   c                2    |rt        |       S t        |       S rq   )r(   r'   )ru   unbacked_onlys     rv   get_free_symbolsr   #  s    $Q''Arx   )   r      r5   )   r   r   r   r5   c                <    |t        |       }|S t        ||       }|S )z1
    Convert strides to fill order (argsort)
    )rI   rJ   )seq	shape_env
sorted_idxs      rv   get_fill_orderr   .  s/     $+CL
  !C0
rx   c                    t        |       D ci c]  \  }}||
 }}}t        t        |             D cg c]  }||   	 }}|S c c}}w c c}w )z
    Convert stride order to fill order
    For channel last format,

    stride order = [3, 0, 2, 1] and fill order = [1, 3, 2, 0]
    )	enumerater   r   )r   idxposlookupr   
fill_orders         rv   stride_order2fill_orderr   <  sR     (1'7883c3h8F8%*3u:%67&)7J7 97s
   AAc                    t        | |      }t        t        |             D cg c]  }d }}t        |      D ]
  \  }}|||<    |S c c}w )z)
    Convert strides to stride order
    r   )r   r   r   r   )r   r   r   _outr   elems          rv   get_stride_orderr   H  sV     !/sI >JCHo
&1
&C
&Z( 4D	J 's   	Ac                     y rq   r   ru   guard_shapes     rv   ir_node_to_tensorr   U  s    KNrx   c                     y rq   r   r   s     rv   r   r   Y  s    LOrx   c                   | y |s%t         j                  j                  j                  }nt        }| j                         D cg c]
  } ||       }}t        |       r.| j                         j                  D cg c]
  } ||       }}nt        j                  |      }| j                         }| j                         }t        |      }t        |      }t         j                  j                  j                  j                         5  t!        j"                  ||||      j%                         }d d d        |S c c}w c c}w # 1 sw Y   S xY w)N)sizestridedtypedevice)r`   graphsizevars	size_hintr   get_sizeis_storage_and_layout
get_layoutr   FlexibleLayoutcontiguous_strides	get_dtype
get_devicerN   r   suppress_guardstorchempty_stridedzero_)	ru   r   shape_fnsr   r   r   r   ts	            rv   r   r   ]  s    	y 77##--!".AHQK.D.Q'(||~'<'<=!(1+==2248KKME\\^F"4(D$V,F	
			#	#	3	3	5 fE&

%' 	
 H / > Hs   D<;E
(EEc                0    t        | t              r| sd gS | S rq   )rr   r   values    rv   may_convert_to_optionalr   y  s     %u vLrx   c                    t        | t              s| | S t        | t        j                        r| j                  S t        | t
        t        f      rt        | j                               S t        d|  dt	        |       j                   d       y )Nzget_device_type(: ))rr   r   r   r   r   rn   
OutputSpecget_device_typer   r   r   rt   s    rv   r   r     sq     !SQY	Au||	$vv	A
+	,q||~..#A3ba)9)9(:!<=rx   c                    t        |       }|dv rt        t        | d      dk(  ryy|t        |      x}yddlm} t        |t              sJ t        ||      S )N)cpucuda_backendtritonTFr5   )TritonScheduling)	r   r   r6   r:   codegen.tritonr  rr   r   
issubclass)ru   r   device_schedulingr  s       rv   	is_tritonr    sn    QF  6fXX./8;!:6!BBK0'...')9::rx   c                    t        |       dk(  S )Nr   )r   rt   s    rv   is_cpur
    s    1&&rx   c           	         t         t              r j                         yt         fdt	        t         j                               dz
        D              }t        j                  j                  j                   j                         d         dk(  xs= t        j                  j                  j                   j                         d         dk  }|xr |S )NFc              3     K   | ]D  }t         j                  j                  j                  j	                         |         z  d k(   F yw)r   N)r`   r   r   size_hint_or_throw
get_stride).0r   	alignmentru   s     rv   	<genexpr>z-is_aligned_realized_tensor.<locals>.<genexpr>  sD       
			,	,Q\\^A->	?)	KPQQs   A
Ar5   )rr   rn   maybe_get_strideallr   r   r  r`   r   r   r  r   )ru   r  aligned_stridesaligned_last_dims   ``  rv   is_aligned_realized_tensorr    s    a A$6$6$8$@ s1<<>*Q./ O 	
++ALLN2,>?1D 	F77..qzz|B/?@AE  //rx   c                   t        |      t        |       k(  rt        |       t        |      k(  sJ t        || |      D ]  \  }}}t        j                  j                  j                  |d      r2t        j                  j                  j                  ||      r]t        j                  j                  j                  |      t        j                  j                  j                  |      k(  r y y)zP
    Returns true if the strides are equal, ignoring dimensions of size 1 .
    r5   FT)r   r   r`   r   r   statically_known_leqstatically_known_equalssymbolic_hint)strides1strides2shapedims1s2s         rv   significant_strides_equalr"    s     u:X&3x=CM+III5(H5 	R7700a8ww77
''""0048H8H8V8V9
 
 	 rx   c                D   t        |       s| S t        d t        || j                               D              r| S t	        || j                         | j                               s| S t        |       \  }}g |j                  }t        | j                               D ]8  \  }}t        j                  j                  j                  |d      s1||   ||<   : t        |j                  |j                  |j                   ||j"                        }t%        t'        ||            S )a  
    Tries to match the strides of the tensor to those in the meta_strides. Strides of insignificant
    dimensions - size 0 or 1 - will be updated.

    If there are real stride differences (NHWC vs NCHW), or the tensor is not realized, then the input will be returned
    c              3  v   K   | ]1  \  }}t         j                  j                  j                  ||       3 y wrq   r`   r   r   r  r  r   r!  s      rv   r  z2try_match_insignificant_strides.<locals>.<genexpr>  s3      B 	
00R8   79r5   datalayout)r   r  r   r  r"  r   as_storage_and_layoutr   r   r`   r   r   r  FixedLayoutr   r   r   offsetrm   ReinterpretView)tensorstridesstorage
old_layout
new_strider   r   
new_layouts           rv   try_match_insignificant_stridesr5    s    !(
 '6#4#4#67  $Wf.?.?.A6??CTU/7GZ%:$$%J&//+, '17700A6#AJJqM' J _'*EFFrx   c                    | j                   j                  d      d   }t        |j                        D cg c]  \  }}|	 c}}|j                  d<   ddlm}  ||        y c c}}w )Noutput)opr   user_visible_output_idxs)record_original_output_strides)r   
find_nodesr   r   metatorch._inductor.compile_fxr:  )gmoutput_noder   r   r:  s        rv   gm_original_output_stridesr@    sa    ((%%%215K#K$4$454Q4K/0 J"2&4s   A$c                    t               }| D ]>  }|t        |j                         d      z  }|t        |j                         d      z  }@ t	        |      S )NFr   )r0   r   r   r  r   )inputssym_varsinps      rv   get_symbolic_inputsrF    sW    !+H L$S\\^5II$S^^%5UKKL >rx   c                     e Zd ZU  e       Zded<    ej                  d      Zded<    ej                  d      Z	ded<    ej                  d      Z
d	ed
<   eej                  dDd              ZdEdZdFdZdGdZdHdZdIdZdJdZdKdLdZ	 dM	 	 	 	 	 	 	 dNdZdOdZdPdZdQdZdRdZdSdZdTdZdUdZdVdZdWdZe dXd       Z!dYdZ"dUdZ#dZd Z$d[d\d"Z%d]d#Z&d^d$Z'dUd%Z(d_d&Z)d`d'Z*dad(Z+dWd)Z,dbd*Z-dZd+Z.dUd,Z/d[dcd-Z0ddd.Z1dFd/Z2ded0Z3dFd1Z4	 df	 	 	 	 	 dgd2Z5dhd3Z6did4Z7	 df	 	 	 	 	 djd5Z8dkd6Z9dld7Z:dmd8Z;dnd9Z<	 df	 	 	 dod:Z=dZd;Z>dpd<Z?dUd=Z@dUd>ZAdqd?ZBdrd@ZCdbdAZDdrdBZEeFr
e dOdC       ZGy!y!)srn   zClassVar[OrderedSet[Any]]_current_originsF)initOrderedSet[Any]originsOptional[list[str]]	tracebackOptional[torch.fx.Node]origin_nodec              #     K   t         j                  }|| z  t         _        	 d  |t         _        y # |t         _        w xY wwrq   )rn   rH  )rK  olds     rv   current_originszIRNode.current_origins  s7      %%"%-	*&)F#cF#s   A2 A?Ac                2    t         j                  | ||       y rq   )r   __setattr__)selfattrr   s      rv   _post_init_setattrzIRNode._post_init_setattr  s     	4u-rx   c                    | j                  dt        | j                               | j                  dt        j                  rt        j                         nd        | j                  dd        y )NrK  rM  rO  )rW  r0   rH  r6   debug_ir_tracebackrM  format_stackrU  s    rv   __post_init__zIRNode.__post_init__  sV    	:d6K6K+LMV5N5N//1TX	
 	t4rx   c                B    t        d | j                         D              S )Nc              3  4   K   | ]  }|j                     y wrq   r   r  deps     rv   r  z(IRNode.get_read_names.<locals>.<genexpr>'       ?s#((?   r0   	get_readsr[  s    rv   get_read_nameszIRNode.get_read_names&      ?dnn.>???rx   c                    | j                   S rq   )rM  r[  s    rv   get_tracebackzIRNode.get_traceback)  s    ~~rx   c                    | j                   S rq   rO  r[  s    rv   get_origin_nodezIRNode.get_origin_node,      rx   c                     y rq   r   r[  s    rv   get_defining_opzIRNode.get_defining_op/      rx   c                X    dt        | dd       }|rt        |      dkD  r|d d  d}|gS )Nzorigins=rK   @   =   z...)r   r   )rU  shortenrK  s      rv   common_reprzIRNode.common_repr2  s@    WT9b9:;s7|b( "c*Gyrx   c                $   t        |      t        | j                  |            z   }t        t        t        |            }|r5t	        dj                  |            }t        |       j                   d| dS t        |       j                   d| dS )Nz,
z(
z
)(r   )r   rv  mapr   indentjoinr   r   )rU  linesru  	multiline	new_liness        rv   
str_helperzIRNode.str_helper9  s     Ud4#3#3G#<==Se_%uzz%01I4j))*#i[<<4j))*!E7!44rx   c                    | j                   S rq   r   r[  s    rv   r   zIRNode.get_dtypeD      zzrx   c                B    	 | j                         S # t        $ r Y y w xY wrq   )r   NotImplementedErrorr[  s    rv   maybe_get_dtypezIRNode.maybe_get_dtypeG  s&    	>>##" 		    	c                2    t        dt        |        d      )Nz#get_layout() is not implemented by !r  r   r[  s    rv   r   zIRNode.get_layoutM  s    !$GT
|ST"UVVrx   c                B    	 | j                         S # t        $ r Y y w xY wrq   )r   r  r[  s    rv   maybe_get_layoutzIRNode.maybe_get_layoutP  &    	??$$" 		r  c                "    | j                         S rq   )r   r[  s    rv   get_output_speczIRNode.get_output_specV  s      rx   c                B    	 | j                         S # t        $ r Y y w xY wrq   )r  r  r[  s    rv   maybe_get_output_speczIRNode.maybe_get_output_specY  s(    	''))" 		r  c                >    t        | j                         t              S )z4True for single tensor output (excludes MultiOutput))rr   r  Layoutr[  s    rv   has_tensor_outputzIRNode.has_tensor_output_  s    $446??rx   c                2    t        dt        |        d      )Nz!get_size() is not implemented by r  r  r[  s    rv   r   zIRNode.get_sizec  s    !$Ed4j\QR"STTrx   c                B    	 | j                         S # t        $ r Y y w xY wrq   )r   r  r[  s    rv   maybe_get_sizezIRNode.maybe_get_sizef  %    	==?"" 		r  c                "    | j                         S rq   r   r[  s    rv   r  zIRNode.shapel  s    }}rx   c                4    t        | j                               S rq   )r[   r   r[  s    rv   	get_numelzIRNode.get_numelp  s    T]]_--rx   c                    t         j                  j                  j                  t	        j
                  | j                         d            S Nr   r`   r   r   r-   r   Eqr  r[  s    rv   is_zero_elementszIRNode.is_zero_elementss  0    ww55ehht~~?OQR6STTrx   c                0    t        dt        |              )a)  
        If the IRNode refers to data which has not been materialized (e.g.,
        it is a Pointwise/Reduction that could potentially have more
        compute fused into it), realize the IRNode into physical memory,
        ending the possibility of fusing into it, but allowing, e.g., multiple
        users to access the data without having to recompute.

        Check StorageBox.realize for a particularly notable implementation.

        TODO(ezyang): I think, in principle, every IRNode should have an
        implementation of this, and most of the time no-op is OK, but you
        really do have to audit each IRNode for this, so for now, raise
        an error if it's not implemented.  Note that some code in graph.py
        will catch this thrown error and suppress it with a warning.
        zrealize NYI on r  r[  s    rv   realizezIRNode.realizev  s      "ODJ<"@AArx   Nc                0    t        dt        |              )Nzcodegen_reference NYI on r  rU  writers     rv   codegen_referencezIRNode.codegen_reference  s    !$=d4j\"JKKrx   c                     y rq   r   r[  s    rv   r   zIRNode.get_device  rp  rx   c                .    | j                         }|J |S rq   )r   rU  r   s     rv   get_device_or_errorzIRNode.get_device_or_error  s    "!!!rx   c                     yNFr   r[  s    rv   has_exceeded_max_readszIRNode.has_exceeded_max_reads      rx   c                >    t        t        |       j                        rq   r  r   r   r[  s    rv   make_loaderzIRNode.make_loader      !$t*"5"566rx   c                >    t        t        |       j                        rq   r  r[  s    rv   make_indexerzIRNode.make_indexer  r  rx   c                >    t        t        |       j                        rq   r  r[  s    rv   r  zIRNode.get_stride  r  rx   c                B    	 | j                         S # t        $ r Y y w xY wrq   )r  r  r[  s    rv   r  zIRNode.maybe_get_stride  r  r  c                >    t        t        |       j                        rq   r  r[  s    rv   get_namezIRNode.get_name  r  rx   c                B    	 | j                         S # t        $ r Y y w xY wrq   )r  r  r[  s    rv   maybe_get_namezIRNode.maybe_get_name  r  r  c                v    	 | j                         t        j                  j                  v S # t        $ r Y yw xY wr  )r  r`   r   graph_inputsr  r[  s    rv   is_input_bufferzIRNode.is_input_buffer  s4    	==?agg&:&:::" 		s   ), 	88c                     yr  r   rU  	thresholds     rv   has_large_inner_fnzIRNode.has_large_inner_fn  r  rx   c                     y rq   r   rU  userss     rv   
mark_reusezIRNode.mark_reuse      rx   c                     y rq   r   r[  s    rv   realize_hintzIRNode.realize_hint  r  rx   c                >    t        t        |       j                        rq   r  r[  s    rv   unwrap_viewzIRNode.unwrap_view  r  rx   c                >    t        t        |       j                        rq   r  r[  s    rv   freeze_layoutzIRNode.freeze_layout  r  rx   c                >    t        t        |       j                        rq   r  rU  r   allow_paddings      rv   freeze_layout_with_stride_orderz&IRNode.freeze_layout_with_stride_order       "$t*"5"566rx   c                >    t        t        |       j                        rq   r  rU  r   s     rv   freeze_layout_with_fill_orderz$IRNode.freeze_layout_with_fill_order  r  rx   c                >    t        t        |       j                        rq   r  rU  r   s     rv   freeze_layout_with_same_orderz$IRNode.freeze_layout_with_same_order  r  rx   c                >    t        t        |       j                        rq   r  rU  exact_stridesr  s      rv    freeze_layout_with_exact_stridesz'IRNode.freeze_layout_with_exact_strides  r  rx   c                >    t        t        |       j                        rq   r  r[  s    rv   get_read_writeszIRNode.get_read_writes  r  rx   c                6    | j                         j                  S rq   r  readsr[  s    rv   re  zIRNode.get_reads      ##%+++rx   c                4    t        | j                               S rq   )r   re  r[  s    rv   	num_readszIRNode.num_reads  s    4>>#$$rx   c                >    t        t        |       j                        rq   r  r[  s    rv   get_storage_numelzIRNode.get_storage_numel  r  rx   c                >    t        t        |       j                        rq   r  rU  r   s     rv   get_free_symbol_useszIRNode.get_free_symbol_uses  r  rx   c                >    t        t        |       j                        rq   r  r[  s    rv   get_reduction_typezIRNode.get_reduction_type  r  rx   c                >    t        t        |       j                        rq   r  r[  s    rv   get_reduction_sizezIRNode.get_reduction_size  r  rx   c                     yr  r   r[  s    rv   	is_externzIRNode.is_extern  r  rx   c                     yr  r   r[  s    rv   is_no_opzIRNode.is_no_op  r  rx   c                >    t        t        |       j                        rq   r  r  s     rv   constant_to_devicezIRNode.constant_to_device  r  rx   c                >    t        t        |       j                        rq   r  r[  s    rv   get_mutation_nameszIRNode.get_mutation_names  r  rx   c                >    t        t        |       j                        rq   r  r[  s    rv   get_operation_namezIRNode.get_operation_name  r  rx   c                >    t        t        |       j                        rq   r  r[  s    rv   get_inputs_that_alias_outputz#IRNode.get_inputs_that_alias_output  r  rx   c                     y rq   r   r[  s    rv   r   zIRNode.dtype  s    (+rx   )rK  zOrderedSet[Node]r   zGenerator[None, None, None])rV  r   r   r   r   r   r   r   r   OrderedSet[str])r   rL  r   rN  r   zOptional[Operation]T)ru  r   r   Sequence[str])TT)r|  zSequence[object]ru  r   r}  r   r   r   r   torch.dtype)r   zOptional[torch.dtype]r   r  )r   zOptional[Layout]r   r   )r   zOptional[OutputSpec]r   r   r   Sequence[Expr])r   Optional[Sequence[_IntLike]])r   z.Union[_IntLike, sympy.Rel, Sequence[_IntLike]]r   r   r   Optional[str]rq   r  zOptional[IndentedBuffer]r   r   r   Optional[torch.device]r   torch.devicer   $Callable[[Sequence[Expr]], OpsValue]r    Callable[[Sequence[Expr]], Expr]r   Sequence[_IntLike]r   r   r  Optional[int]r   r   r  rs   r   r   r   rn   Fr   	list[int]r  r   r   r   r   r  r   r   r   list[_IntLike]r   r   r  r"  r  r   r   r   r   zdependencies.ReadWritesr   zOrderedSet[Dep]r   rs   r   ri   r   r   r   r|   r   Sequence[sympy.Expr]r   r  r   rn   r   r  )Hr   r   r   r0   rH  r   dataclassesfieldrK  rM  rO  staticmethod
contextlibcontextmanagerrR  rW  r\  rf  ri  rl  ro  rv  r  r   r  r   r  r  r  r  r   r  propertyr  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  re  r  r  r  r  r  r  r  r  r  r  r  r   r   r   rx   rv   rn   rn     s-   2<,/>  1{00e<G_<%6[%6%6E%BI"B+<;+<+<%+HK(H*  *.5@  PT	5%	504	5HL	5		5W!@U  .UB$L
777777 7<77/37	7
77 DI7+7<@7	7
7,%7 %*7!7	!7
777777 	+ 
+ rx   c                      e Zd ZddZddZddZddZddZddZddZ	ddZ
dd	Zdd
ZddZddZddZ	 d	 	 	 ddZddZy) 	Operationc                    d | _         y rq   operation_namer[  s    rv   r\  zOperation.__post_init__  s
    -1rx   c                    t         rq   r  r[  s    rv   r   zOperation.get_device      !!rx   c                6    t        | d      sJ | j                  S NrO  )hasattrrO  r[  s    rv   rl  zOperation.get_origin_node
  s    t]+++rx   c                6    t        | d      sJ | j                  S )NrK  )r=  rK  r[  s    rv   get_originszOperation.get_origins  s    tY'''||rx   c                6    | j                   J | j                   S rq   r6  r[  s    rv   r  zOperation.get_operation_name  s     ""..."""rx   c                     yr  r   r[  s    rv   r  zOperation.is_extern  r  rx   c                     yr  r   r[  s    rv   r  zOperation.is_no_op  r  rx   c                    t         rq   r9  r[  s    rv   r  zOperation.get_read_writes  r:  rx   c                &    || j                         v S rq   )rf  )rU  r   s     rv   
is_user_ofzOperation.is_user_of  s    t**,,,rx   c                B    t        d | j                         D              S )Nc              3  4   K   | ]  }|j                     y wrq   r_  r`  s     rv   r  z+Operation.get_read_names.<locals>.<genexpr>#  rb  rc  rd  r[  s    rv   rf  zOperation.get_read_names"  rg  rx   c                6    | j                         j                  S rq   r  r[  s    rv   re  zOperation.get_reads%  r  rx   c                    t         rq   r9  r[  s    rv   get_outputszOperation.get_outputs(  r:  rx   c                    t               S rq   r/   r[  s    rv   get_unbacked_symbol_defsz"Operation.get_unbacked_symbol_defs+  
    |rx   c                    t               S )a  
        When unbacked_only=True:
        Returns the unbacked symbols which are required to be in scope in
        order to successfully perform codegen for this buffer.  For example,
        a buffer that corresponds to an extern kernel call that takes i0 as
        an argument would return {i0} here.  This is used to generate necessary
        dependencies that ensure we actually bind i0 in codegen before you
        try to use it.

        Note that this is NOT transitive; in particular, if this buffer takes
        in as input another buffer with dynamic shape (e.g., (i0,)), we will
        not report it here, because you will already have a dependency
        on that buffer, which will eventually have a dependency on i0 if
        necessary.

        When unbacked_only=False:
        Similar to `unbacked_only=True` but including all free symbols
        instead of only free unbacked symbols.
        r/   r  s     rv   r  zOperation.get_free_symbol_uses.  s    , |rx   c                     y)z
        Gets extra global memory size needed by this buffer.
        Some algorithms (e.g. group gemm) may require extra global memory in the generated code.
        r   r   r[  s    rv   get_workspace_sizezOperation.get_workspace_sizeF  s    
 rx   Nr  r  r  )r   rJ  r  r  r$  )r   r   r   r   r  r%  r   list[Buffer]r   r|   r  r(  r&  )r   r   r   r\  r   rl  r?  r  r  r  r  rE  rf  re  rJ  rL  r  rP  r   rx   rv   r4  r4    sc    2" #"-@," %*!	!0rx   r4  c                  <    e Zd ZU ded<   ded<   ded<   ded<   	 d	 	 	 dd	Zd d
Zd! fdZd"dZeZd#dZ	d$dZ
d%dZd%dZed&d       Zeej"                  fd'd       Zed(d       Zd)dZed"d       Zd*d+dZdd,dZd-dZd.dZd/dZd0dZd1dZd2dZ xZS )3Loopsr  r   r  r   Callable[..., Any]inner_fnr  rangesc                     t               j                  g fd| j                  D        | j                         S )Nc              3  6   K   | ]  }t        |        y wrq   r   r  er   s     rv   r  z-Loops.get_free_symbol_uses.<locals>.<genexpr>Y  s     FQq-0F   )r0   unionrX  inner_fn_free_symbolsr  s    `rv   r  zLoops.get_free_symbol_usesU  s@     "z|!! 
F$++F
&&}5
 	
rx   c                   | j                  d| j                  j                   dt        | j                        | j                         g|D cg c]  }| dt        | |        c}z   d| j                  gz         S c c}w )N'=origin_node=)r  r   r   r   r   inner_fn_strr   rO  )rU  namesr   s      rv   _to_strzLoops._to_str]  s    DKK$$%Q'DJJ!!#
 <AA4$qt,-.AB d..1234
 	
 Bs   A?
c                "    t         |           y rq   )superr\  rU  	__class__s    rv   r\  zLoops.__post_init__h  s    rx   c                $    | j                  d      S )NrX  rg  r[  s    rv   __str__zLoops.__str__k  s    ||K((rx   c                    | j                   S rq   r   r[  s    rv   r   zLoops.get_devicep      {{rx   c                    | j                   S rq   rk  r[  s    rv   rl  zLoops.get_origin_nodes  rm  rx   c                    | j                   S rq   rm  r[  s    rv   r   zLoops.get_sizev  rr  rx   c                    | j                   S rq   rm  r[  s    rv   get_pointwise_sizezLoops.get_pointwise_sizey  rr  rx   c                    |j                  dd       }|j                  dd       } | |i |}|j                  d|       |j                  d|xs |j                         t        j	                  |      S )NrO  rM  )poprW  rM  rm   create)clsr   r   rO  tbrs         rv   ry  zLoops.create|  sm    jj5ZZT*   	
]K8	["*;<""rx   c                    t        |       D cg c]0  \  }}|dk(  rt        j                  j                  nt	        ||      2 c}}S c c}}w Nr5   )r   r   SZerorZ   )rX  rl   nr   s       rv   _indexzLoops._index  sH     "&)
1 FEGGLL(Fvq(QQ
 	
 
s   5Ac                `   t        t        j                               }t        j                  |      5  t	        j
                  t        dd      5   | j                  | j                           |j                         cd d d        cd d d        S # 1 sw Y   nxY wd d d        y # 1 sw Y   y xY wNallow_indexingT)
rB   r`   MockHandlerset_ops_handlerr   r   r   rW  inner_fn_argsgetvalue)rU  	opcounters     rv   inner_fn_opcountzLoops.inner_fn_opcount  s     1	i(	(LL)94@	( DMM4--/0%%'	( 	( 	( 	( 	( 	( 	(s#   B$-B<	B$B	B$$B-c                :    | j                  | j                        fS rq   )r  rX  r[  s    rv   r  zLoops.inner_fn_args  s    DKK(**rx   c                r    t        j                  j                  | j                  g| j	                          S rq   )r`   KernelFormatterHandlerir_to_stringrW  r  r[  s    rv   re  zLoops.inner_fn_str  s3    ''44MM
 ..0
 	
rx   c                x    |d}t        |t        j                        }| j                         j                  |kD  S r  )maxr6   realize_opcount_thresholdr  num_opsr  s     rv   r  zLoops.has_large_inner_fn  s9    I	6#C#CD	$$&..::rx   c                h    | j                  | j                        }t        | j                  ||      S NrB  )r  rX  r=   rW  )rU  r   r   s      rv   r`  zLoops.inner_fn_free_symbols  s'    DKK(#DMM5VVrx   c                |   t        j                  t        dd      5  | j                         rJt	        | j                         | j                         | j                               j                  cd d d        S t	        | j                         | j                               j                  cd d d        S # 1 sw Y   y xY wr  )	r   r   r   r  r?   r  r   r  r  r[  s    rv   re  zLoops.get_reads  s    \\.*:DA 	&&(*$$&MMO++- %	 	 +$$&MMO %	 	 	s   AB271B22B;c                H    t        | j                         j                        S rq   )r0   r  read_buffersr[  s    rv   rf  zLoops.get_read_names  s    $//1>>??rx   c                H    t        | j                         j                        S rq   )r   r  r  r[  s    rv   r  zLoops.num_reads  s    4((*7788rx   c                2    t        dt        |        d      )Nz+get_reduction_size() is not implemented by r  r  r[  s    rv   r  zLoops.get_reduction_size      !9$t*QG
 	
rx   c                2    t        dt        |        d      )Nz+get_reduction_type() is not implemented by r  r  r[  s    rv   r  zLoops.get_reduction_type  r  rx   c                2    t        dt        |        d      )Nz+constant_to_device() is not implemented by r  r  r  s     rv   r  zLoops.constant_to_device  r  rx   r  r(  )rf  r  r   r   r  r  r  r  r  )r   r   r   r   r   rm   )rX  r  rl   r4   r   r  )r   rC   r   zSequence[Sequence[_IntLike]]rq   r  r   r   r   OrderedSet[Symbol]r%  r  r&  r)  r  r+  ) r   r   r   r   r  rg  r\  ro  __repr__r   rl  r   rv  classmethodry  r/  r4   INDEXr  rK   r  r  re  r  r`  re  rf  r  r  r  r  __classcell__rk  s   @rv   rU  rU  N  s       %*
!
	!
	
 ) H  	# 	# :>** 
 
 ( (+ 
 

;W@9




rx   rU  c                   |j                   rt        j                  t        d      |      S t        j                  d|      S )Nnanr   )is_floating_pointr^   constantfloat)r   r   s     rv   nop_loader_fnr    s1    ||E%L%00||Au%%rx   c                  D    e Zd ZddZddZd	dZ	 	 	 	 	 	 	 	 d
dZddZy)	Pointwisec                p    | j                         rt        t        | j                        S | j                  S Nr  )r  r
   r  r   rW  r[  s    rv   r  zPointwise.make_loader  s)      "=

;;}}rx   c                    g S rq   r   r[  s    rv   r  zPointwise.get_reduction_size  s    	rx   c                     y rq   r   r[  s    rv   r  zPointwise.get_reduction_type  rp  rx   c                p    | j                         }t        j                  |xs d ||       ||            S Nunnamed)r  r^   storerU  output_nameindexervarsloaders        rv   store_outputzPointwise.store_output  s2     !!#yy1	74=&,OOrx   c                    | j                         } t        j                  t        d|      |      }t	        || j
                  || j                        S FMove this to a given device. Requires that all reads are to constants.override_devicer   r   rW  rX  )r  r   r   ConstantBufferr  r   rX  rU  r   r  s      rv   r  zPointwise.constant_to_device  sH    !!#Hn.?HPfT[[
 	
rx   Nr  r)  r  r  r  r  !Callable[[Sequence[Expr]], Never]r  r  r   r   r+  )r   r   r   r  r  r  r  r  r   rx   rv   r  r    sF    P"P 3P 	P
 
P
rx   r  c                  F    e Zd ZU ded<   dZded<   ddZ	 	 	 	 	 	 	 	 d	dZy)
Scatterr  output_indexerNrE   scatter_modec                    | j                         } t        j                  t        d|      |      }t	        || j
                  || j                  | j                  | j                        S )r  r  )r   r   rW  rX  r  r  )	r  r   r   r  r  r   rX  r  r  r  s      rv   r  zScatter.constant_to_device  s]    !!#Hn.?HP**;;..**
 	
rx   c                    | j                         }|d}t        j                  | || j                  |             ||      | j                        S )Nr  )mode)r  r^   r  r  r  r  s        rv   r  zScatter.store_output
  sT     !!##KyyD''-.4L""	
 	
rx   r+  r  )r   r   r   r   r  r  r  r   rx   rv   r  r    sB    44"L)"

"
 3
 	

 

rx   r  
logical_ormaximumminimummuladdbitwise_xor)anyr  minprodsumxor_sumz"dict[str, Callable[..., OpsValue]]REDUCTION_COMBINE_FNc                      t         v r	t             S  dv r	 	 	 	 	 	 d fd}|S  dk(  r	 	 	 	 	 	 dd}|S t        d        )Nargmaxargminc                   | \  }}|\  }}dk(  rt        j                  ||      }nt        j                  ||      }t        j                  ||      }t	              rt        j
                  ||      }t        j
                  ||      }	t        j                  |t        j                  ||	            }t        j                  |t        j                  ||	            }rt        j                  ||      nt        j                  ||      }
t        j                  |t        j                  ||
            }t        j                  |||      t        j                  |||      fS )Nr  )	r^   ltgteqr!   ner  logical_andwhere)aba_valuea_indexb_valueb_indexmaskequala_isnanb_isnantiearg_break_ties_leftr   reduction_types              rv   argmax_combine_fnz3get_reduction_combine_fn.<locals>.argmax_combine_fn-  s     !GW GW)vvgw/vvgw/FF7G,Ee$&&'2&&'2~~dCFF7G,DEucoogw.OP ' w(VVGW- 
 >>$s(CDD		$1		$1 rx   welford_combinec                l    | \  }}}|\  }}}||z
  }||z   }	||	z  }
|||
z  z   ||z   ||z  |z  |
z  z   |	fS rq   r   )r  r  a_meana_m2a_weightb_meanb_m2b_weightdelta
new_weight	w2_over_ws              rv   welford_combine_fnz4get_reduction_combine_fn.<locals>.welford_combine_fnN  sm     &'"FD(%&"FD(VOE!H,J :-I**teemh6BB rx   zunknown reduction_type=)r  tuple[object, object]r  r  r   tuple[OpsValue, OpsValue])r  #tuple[OpsValue, OpsValue, OpsValue]r  r  r   r  )r  r  )r  r   r  r  r  s   ```  rv   get_reduction_combine_fnr   %  s     --#N33	/	/	$	)>	&	: ! 	,	,	2	2	 1	  "! "$;N;K"LMMrx   c                      e Zd ZU ded<   ded<   ded<   ded<   dd	ZeZd d! fd
Zd"dZd#dZ	 	 	 	 	 	 	 	 	 	 d$dZ	d%dZ
d&dZd d!dZd'dZe	 d(	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d)d       Ze	 	 	 	 	 	 	 	 	 	 d*d       Zeej&                  df	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d+d       Ze	 	 	 	 	 	 d,d       Ze	 	 	 	 	 	 d,d       Ze	 	 	 	 	 	 	 	 d-d       Ze	 	 	 	 	 	 d.d       Ze	 d(	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d/d       Ze	 	 	 	 	 	 	 	 	 	 	 	 d0d       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d1d       Ze	 d(	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d2d       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d3d       Z xZS )4	Reductionr  reduction_rangesrD   r  r  	src_dtyperH   reduction_hintc                $    | j                  d      S )N)rX  r  r  rn  r[  s    rv   ro  zReduction.__str__l  s    ||LMMrx   c                z    t         |          t               j                  fd| j                  D         z  S )Nc              3  6   K   | ]  }t        |        y wrq   r[  r\  s     rv   r  z1Reduction.get_free_symbol_uses.<locals>.<genexpr>s  s     PQq-0Pr^  )ri  r  r0   r_  r  rU  r   rk  s    `rv   r  zReduction.get_free_symbol_usesq  s:    w+M:=OZ\=O=OP$:O:OP>
 
 	
rx   c                    | j                   S rq   )r  r[  s    rv   r  zReduction.get_reduction_sizev  s    $$$rx   c                    | j                   S rq   )r  r[  s    rv   r  zReduction.get_reduction_typey      """rx   c           	         t        j                  | j                  | j                  | j                  | j                  ||            }t        j                  |xs d ||      |      S r  )r^   	reductionr   r  r  rW  store_reduction)rU  r  r  r  reduction_varsr   s         rv   r  zReduction.store_reduction|  sW     JJNNMM$/	
 "";#;)WT]ERRrx   c                X    t        | j                        t        | j                        z   S rq   )r   rX  r  r[  s    rv   index_lengthzReduction.index_length  s!    4;;#d&;&;"<<<rx   c                    | j                  | j                        }| j                  | j                  t        j                        }||fS rq   )r  rX  r  r4   R0_INDEX)rU  r   rindexs      rv   r  zReduction.inner_fn_args  s8    DKK(T22DMMBvrx   c                    | j                  | j                        }| j                  | j                  t        j                        }t        | j                  |||      S r  )r  rX  r  r4   r  r=   rW  )rU  r   r   r  s       rv   r`  zReduction.inner_fn_free_symbols  sH    DKK(T22DMMB#MM5&
 	
rx   c           
     
   | j                         } t        j                  t        d|      |      }t	        || j
                  || j                  | j                  | j                  | j                  t        j                        S )r  r  r   r   rW  rX  r  r  r  r  )r  r   r   r  r  r   rX  r  r  r  rH   DEFAULTr  s      rv   r  zReduction.constant_to_device  sm    !!#Hn.?HP**;;!22..nn(00	
 		
rx   Nc	           
        t         j                  j                  j                  |      }	t         j                  j                  j                  t	        |            }
|dk(  xsG t         j                  j                  | t        j                         xr |dvxr t        j                  }t        |	      rt        |
      st        j                  dfS t        j                  |       }|j                  }d}|rat!        j"                  t         j$                  j&                  | d      }t!        j"                  t         j$                  j&                  | d      }n	 	 	 	 	 	 dd}|}|
dk(  r ||	|
      }|dk(  rt        j(                  |fS |t+        |t,              rt/        j0                  t2        d	d      5  t5        |      \  }}d d d        hft         j                  j                  j                  t	        ||z               }|	|k(  r,t6        j9                  d
|||||       t        j(                  dfS t        j(                  |fS |	|k  s|
|dz  dz  k\  rt        j                  dfS t;        | |||||dk7  r|nd|t        j                        }dd} ||      \  }}|r ||      \  }}t=        |      dk(  rt        j                  dfS t?        j@                  |jC                         |jE                               \  \  }}}d}d}|D ]  }t         j                  j                  jG                  ||      }t         j                  j                  jI                  ||tK        |jM                                     } tO        d | D              }!|!r|dz  }|dz  } ||kD  rt        j(                   ||	|
      fS t        jP                   ||	|
      fS # 1 sw Y   xY w)Nscanr  r5       T)inner_reductionFc                     yr~  r   )reduction_numel_hint
numel_hints     rv   inner_reduction_splitsz4Reduction.num_splits.<locals>.inner_reduction_splits  s     rx   r  zUse previous IRNode's range and reduction_ranges instead of split. current ranges: %s, current reduction ranges: %s, current split: %d, new ranges: %s, new reduction ranges: %sr  r   r  r  c           	     &   t        d t        | j                         | j                         | j	                               |       }|j                         }|j                  J |j                  D  cg c].  } t        | t              rt        | t        j                        s| 0 }} g }d}t        |j                  d       D ]  t        fd|D              s|j                  j                         j                   t"        j$                  j&                  v sZt"        j$                  j&                  j                      }t)        |j*                  dd       }|j-                          t)        |j*                  dd       |k7  sd} ||fS c c} w )	Nr   r   r   r   r*  r)  Fc                    | j                   S rq   r_  rt   s    rv   <lambda>z@Reduction.num_splits.<locals>.get_read_indices.<locals>.<lambda>  s
    aff rx   keyc              3  N   K   | ]  }|j                   j                  v   y wrq   )r   r'   )r  r|  mds     rv   r  zAReduction.num_splits.<locals>.get_read_indices.<locals>.<genexpr>   s      FaqBHH111Fs   "%r   T)ComputedBufferr   r   r   r   r  
range_varsrr   r   r   Numbersortedr  r  appendr   r   r`   r   name_to_bufferr   r*  decide_layout)	r|  cbread_writesr,  indiceschangedbuforiginal_strider*  s	           @rv   get_read_indicesz.Reduction.num_splits.<locals>.get_read_indices	  sW   %<<>++-
 B ,,.K ))555 %//a&z!U\\/J J 
 GG[..4DE +F:FFNN288,ww!''"8"88gg44RWW=*1#**h*M))+"3::x>/Q&*G+ G##!s   23Fr   c              3  &   K   | ]	  }|d kD    ywr5   Nr   r  r   s     rv   r  z'Reduction.num_splits.<locals>.<genexpr><  s     /!A/   )r  rs   r   rs   r   rs   )r|  r  r   ztuple[Sequence[Expr], bool]))r`   r   r   r  r[   has_featurer8   REDUCE_TO_SINGLE_ELEMENTr6   split_reductionsrw   rH   r  rG   ry  multi_processor_count	functoolsr
   choicesreduction_split_factorINNERrr   rm   r   r   r   r>   logdebugr  r   r7   index_vars_squeezer   r  simplify_with_rangesstride_hintsr   keysr  OUTER)"r   	dst_dtyper  rW  rX  r  r  reduction_numel
input_noder  r   should_splitpropsnum_smmin_elements_per_threadr!  outer_reduction_splitssplit
new_rangesnew_reduction_rangesextracted_numel_hintr|  r8  r4  r5  r   r  ranges1	num_outer	num_innerr   jr0  outers"                                     rv   
num_splitszReduction.num_splits  s     !ww//==oNWW%%33M&4IJ
%/ 
##FN,S,STT (( '' 	 /0Z
5K ((!++ ''/,,"$@I@Q@Q		00&$A" AJ@Q@Q		00&%A"
&)  &<" ?*+?LEz$**E11%*Z*K\\.2BDI H <JG",H
 ).B.N+,77+;+;+I+I%j3G&GH,( ,/CC		G #,!&0	  -22B66 &&-- $;;VaZ"_, ((!++--;v-E>5(00	
	$B ,A.)!,JGQw<1 ((!++'3'F'FJJL!..0(
$NW 		 		A  55aAAgg&&33>4#7G /w//EQ	Q			 y  &&(>$j)   !&&(>$j)  MH Hs   6OOc                @    D cg c]+  }t         j                  j                  j                  |      - c}t	        ||      dfd|dv rAt        ddt        j                              j                         	 	 	 	 	 	 d fdfdS  S c c}w )z1Convert inner_fn from a reduction to an pointwisec                     t        j                   fdt        j                  D cg c]  }t	        |       c} D              S c c}w )Nc              3  0   K   | ]  } |        y wrq   r   )r  r  r   value_fns     rv   r  z=Reduction._unroll_reduction_fn.<locals>.fn.<locals>.<genexpr>[  s        UF+   )rA  reduce	itertoolsproductr   )r   ru   
combine_fnr  ra  s   ` rv   r   z*Reduction._unroll_reduction_fn.<locals>.fnX  sH    ##"+"3"3,<=q%(=# 
 >s   A
r  r  Nc                    |D cg c]  }t        j                  |       }} | |      t        j                   |      t        j
                        fS c c}w rq   )r   expandr^   
index_exprr   int64)r   r  r   flatten_indexrW  s      rv   ra  z0Reduction._unroll_reduction_fn.<locals>.value_fnl  sP     4::a%,,q/::UF+NN=#8%++F  ;s   Ac                     |       d   S r~  r   )r   r   s    rv   r&  z0Reduction._unroll_reduction_fn.<locals>.<lambda>u  s    E1 rx   )r   r  r   r   )r   r  r  r  r   r  )	r`   r   r   evaluate_static_shaper   r,  r   r   r  )	rW  r  r  r  ru   rf  rl  r   ra  s	   ``   @@@@rv   _unroll_reduction_fnzReduction._unroll_reduction_fnJ  s     @P
:;AGG2215
 .niH
		 11' 112BC	
 ln )3E* .-HIM
s   0Bc
                \   t         j                  j                  j                  t	                    dk(  rkdfd}
 |
d       |
d       |
d       |
d      dj                         v s
J  d       dfd}t        j                  |||t        |            S dk(  r+dv rdfd	}ndfd
}t        j                  |||      S t        t              rt         j                  j                  j                        t        j                  k  rNt	        |      dk7  st        |j                        r+t        j                  || j!                  |      |      S | j#                  ||||		      \  }}dfd} ||      }|t$        j&                  k(  r|}|dk(  r4|	J t)        |	      \  }}|J |J | j+                  ||||||
      S |dkD  r| j-                  ||||||	
      S t.        j                  t1        ||||            S )Nr   c                    t         j                  k(  rt        |       S j                  r't        | t        j
                        sJ t        |       S t        | t        j                        sJ t        |       S rq   )	r   r   r  rr   typingSupportsFloatr  SupportsIntrs   )valrL  s    rv   py_cnstz!Reduction.create.<locals>.py_cnst  sa    

*9$00%c6+?+?@@@ :%%c6+=+=>>>s8Orx   r5   )r  r  r  r  z* not supported for zero-dimension tensors!c                6    t        j                           S rq   r^   r  )r   rL  r  rtypes_to_initss    rv   const_fnz"Reduction.create.<locals>.const_fn  s    ||ON$CYOOrx   r  rg  c                0    t        j                  d      S r  rx  )r   rL  s    rv   r   zReduction.create.<locals>.fn  s    <<955rx   c                n    D cg c]  }t         j                  j                   }} | |      S c c}w rq   r   r  r  )r   r   reduction_indexrW  r  s      rv   r   zReduction.create.<locals>.fn  s1    =M&Nuww||&NO&N#E?;; 'O   !2c                `    t              r| S | dkD  rt        | t        j                        S | S r~  )rw   r  r6   min_num_split)rT  rM  s    rv   _maybe_increase_splitz/Reduction.create.<locals>._maybe_increase_split  s/    /*qy5&"6"677rx   r  r  )ru  r   r   zUnion[bool, float, int])r   rs   r   r_   )rT  rs   r   rs   )r`   r   r   simplifyr[   rJ  r  ry  r   rr   r   r  r6   unroll_reductions_thresholdrW   r   ro  r]  rH   r  r>   !create_multilayer_existing_rangescreate_multilayerrm   r  )rz  r   rL  r  rW  rX  r  r  r  rN  rv  rz  r   hintrT  r  rU  rV  rM  ry  s     ` ` ``          @@rv   ry  zReduction.createz  s    ''**33MBR4STa$ qz"1:
qz	O "_%9%9%;; !""LM;P ##!F|	 $   a!556
< ##YF $  
 0  33OD001v&!+vfkk/B ##11.	  $   nn

e	 &e,
 ]222!NB;)))/R0,J, )))'33388 $  QY((   !!1-#-	
 	
rx   c           	        | dv rAt        |      rt        d      S t        |      ryt        j                  |      j
                  S | dv rAt        |      rt        d      S t        |      ryt        j                  |      j                  S t        |      rdnd}t        |      rdnd}|||||||f|||ft        d      |fd	|    S )
N)r  r  z-infF)r  r  infTr   r5   )r  r  r  r  welford_reducer  online_softmax_reduce)r!   r  r    r   iinfor  r  )r  r   zeroones       rv   default_accumulatorzReduction.default_accumulator  s     ..e$V}$!%({{5)---..e$U|#!%({{5)---(/uQ&u-d1#T40 $dD1&+FmT%:
  	rx   c                :    | dk(  ryt         j                  | |      S )Nr  r   )r  r  r  r   s     rv   default_valuezReduction.default_value:  s#     --,,^UCCrx   c                    | dk(  r|S | dk  r(|dk  r#|t         j                  k(  rt         j                  S | dk  r(|dk  r#|t         j                  k(  rt         j                  S |S )Nr     i      )rH   rK  
OUTER_TINY)rT  r   r  s      rv   _multilayer_second_step_hintz&Reduction._multilayer_second_step_hintB  sg     B;!!C<J#-.MDWDW2W +++TMc!-"5"55 +++rx   c                z   |yt         j                  j                  j                  |j	                         |      sy|j                          	 t        |       |j                         }t        |dd       D ]3  \  }}t         j                  j                  j                  |d      s1|c S  y# t        $ r Y yw xY w)z
        If we are reducing over the full tensor, and it is non-dense in the last dimension,
        reindex so we reduce over the dense dimension. initially just handle complete
        reduction case
        Nr  r5   )
r`   r   r   r  r  r  r+  r  r  r   )rz  rM  rN  r0  r   r   s         rv   $check_for_split_dense_dim_reindexingz.Reduction.check_for_split_dense_dim_reindexingS  s     ww77  "O
 	!*- '')gcrl+ 	DAqww771=	  # 		s   B. .	B:9B:c                
  
 | j                  |      }t        j                  |g|      t        j                  j
                  j                  t        j                  |z  d             
	 	 	 	 	 	 d
fd}	|	S )Nr   c                   |\  }| ^ }|z  |z   d
fd}r`t              }t        j                  t        j                  |      t        j                  |            }t        j                  ||	      S  |       S )Nc                 $       g            S rq   r   )r4  r  	new_indexr   s   rv   bodyzCReduction._multilayer_wrap_loader.<locals>.wrapper_fn.<locals>.body  s    i');<<rx   )r   r_   )rQ   r^   r  rj  masked)r   r~  reduction_blockr  index_dtyper  r4  r  
block_sizedefaultr  	need_maskrM  r   s         @@rv   
wrapper_fnz5Reduction._multilayer_wrap_loader.<locals>.wrapper_fn  s     "1_*/'Y ?2_DG= = -o>vvNN7K8NN?K@ zz$g66vrx   )r   Sequence[Symbol]r~  r  r   r_   )	r  Viewdynamic_reshape_indexerr`   r   r   r-   r   r  )rz  r  r  rM  rT  r  r  rN  dense_indexr  r  r   s    ` ` ``   @@rv   _multilayer_wrap_loaderz!Reduction._multilayer_wrap_loaders  s     >>Z
 ../
 ((>>HH_u,a0
 
		#	6F		 	( rx   c                    t        d D              s
J d       t        j                  |t        |      t        |      z         	 	 	 	 	 	 dfd}|S )Nc              3  &   K   | ]	  }|d k(    ywr:  r   r  r|  s     rv   r  zDReduction._multilayer_wrap_loader_existing_ranges.<locals>.<genexpr>  s     3a163r<  z8Only enabled for numel_hint == 1, found original_ranges=c           	         | d t               }| t              d  } | t        |      t        |      z               S rq   )r   r   )merged_indexnew_reduction_indexoriginal_idxr  r  original_rangesr   s       rv   r  zEReduction._multilayer_wrap_loader_existing_ranges.<locals>.wrapper_fn  sQ     ((>#o*>?L$S%9%;<Ii(51D+EEF rx   )r  r*  r  r*  r   r_   )r  r  r  r   )rz  r  r  original_reduction_rangesrU  rV  r  r   s    ``    @rv   '_multilayer_wrap_loader_existing_rangesz1Reduction._multilayer_wrap_loader_existing_ranges  sy     3?33 	
G6HI	
3 ..%uZ'85AU;V'V
		.		!5		 		 rx   c                   |t         j                  t         j                  fvr|nt         j                  }t        j                  |||||||	|      }|j                          |j                         	 	 	 	 	 	 dfd}t        j                  j                  j                  t        |            }| j                  |
||      }||dt        |       k(  sJ t        j                  t	        |||||t        |      d |	||            S )a
        Break a large reduction up into multiple smaller reductions
        recursively
        c                     g | |      S rq   r   )r   r~  intermediate_loaders     rv   intermediate_fnz;Reduction.create_multilayer_helper.<locals>.intermediate_fn  s     ''A'A'ABBrx   Nr  )r   r  r~  r  r   r_   )r   float16bfloat16r  r  ry  r  r  r`   r   r   r   r[   r  r   rm   )rz  r   rL  r  r  r  r  rU  rV  r  rT  r  intermediate_dtypeintermediater  r   r  s                   @rv   create_multilayer_helperz"Reduction.create_multilayer_helper  s(   0  ??  	
 !'' 	
 	*668	C%	C8J	C	C
 WW%%//o0NO
99:~
 *-Cs?/C"DDDD(&!+C,@,B!C-#-	
 	
rx   c                    t        |      }t        ||dz
  z   |      }| j                  ||      }| j                  |||||||
      }| j	                  ||||||g |||g|||	      S )r  r5   )r[   r2   r  r  r  )rz  r   rL  r  rW  rX  r  r  rT  r  rN  rM  r  r  r  s                  rv   r  zReduction.create_multilayer  s    & ((89o;UC
##NI>00

 ++feL
 	
rx   c                j    | j                  |||||      }| j                  ||||||g ||||	d|
      S )r  r  )r  r  )rz  r   rL  r  rW  r  r  rU  rV  r  r  r  s               rv   r  z+Reduction.create_multilayer_existing_ranges)  sc    $ @@% 

 ++%+o+
+ 
 	
rx   r  r  r  r)  r  
r  r  r  r  r  r  r  r  r   r   r&  r   zSequence[Sequence[Expr]]r+  rq   )r   r  rL  r  r  r  rW  Callable[..., OpsValue]rX  r  r  r  r  z%Union[ReductionType, Literal['scan']]rM  r   rN  Optional[IRNode]r   tuple[ReductionHint, _IntLike])
rW  z<Callable[[Sequence[_IntLike], Sequence[_IntLike]], OpsValue]r  r  r  r   r  r  r   z(Callable[[Sequence[_IntLike]], OpsValue])r   r  rL  r  r  r  rW  rV  rX  r  r  r  r  rD   r  rH   rN  r  r   rm   r  r   r   r  r   #Union[_NumLike, Sequence[_NumLike]])rT  ri   r   rs   r  rH   r   rH   )rM  ri   rN  r  r   r  )r  r  r  r  rM  ri   rT  ri   r  ri   r  r  rN  r  r   Callable[..., object])r  @Callable[[Sequence[sympy.Expr], Sequence[sympy.Expr]], OpsValue]r  r  r  r  rU  Sequence[Integer]rV  r  r   r  )r   r  rL  r  r  r  r  rV  r  r  r  r  rU  
list[Expr]rV  list[Integer]r  rD   rT  ri   r  rH   r   rm   )r   r  rL  r  r  r  rW  rV  rX  r  r  r  r  rD   rT  ri   r  rH   rN  r  r   rm   )r   r  rL  r  r  r  rW  rV  r  r  r  r  rU  r  rV  r  r  rD   r  rH   r   rm   )r   r   r   r   ro  r  r  r  r  r  r  r  r`  r  r/  r]  ro  r  rH   r  ry  r  r  r  r  r  r  r  r  r  r  r  s   @rv   r  r  d  s   ((!!!!N H

%#S"S 3S 	S
 )S 
S=


  (,^^^ ^ *	^
 #^ -^ >^ ^ %^ 
(^ ^@ -N-,- - 	-
 
2- -^  )6(=(='+^
^
 ^
 	^

 %^
 ^
 )^
 &^
 &^
 %^
 
^
 ^
@ $/	, < DD$/D	,D D %(:G	   &4D	 >  (,('( -( "	(
 ( ( 5( %( 
( (T P ( $2	
 & 0 
J 8 =
=
 =
 	=

 '=
 (=
 $2=
 =
 ,=
 &=
 =
 &=
 
=
 =
~  (,+
+
 +
 	+

 %+
 +
 )+
 &+
 +
 &+
 %+
 
+
 +
Z $
$
 $
 	$

 %$
 ($
 $2$
 "$
 ,$
 &$
 &$
 
$
 $
rx   r  c                  l     e Zd ZU ded<   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 	 	 	 	 	 	 	 	 	 ddZ xZS )MultiOutputReductionrs   output_indexc
           
         t              rft              dk(  rd   }
n	 	 	 	 	 	 dfd}
t        |   |||
|||||       |	| _        y )Nr5   r   c                2     t         fdD              S )Nc              3  0   K   | ]  } |        y wrq   r   )r  r   r   reduction_idxs     rv   r  z@MultiOutputReduction.__init__.<locals>.loader.<locals>.<genexpr>n  s     HR]3Hrb  )r   )r   r  	inner_fnss   ``rv   r  z-MultiOutputReduction.__init__.<locals>.loaderk  s     HiHHHrx   r  )r   r  r  r  r   ztuple[OpsValue, ...])callabler   ri  __init__r  )rU  r   rL  r  rX  r  r  r  r  r  r  rk  s      `       rv   r  zMultiOutputReduction.__init__W  s     I"I y>Qq\FI#I4BI%I
 	-)) 	 		
 )rx   c           	     <   t        j                  | j                  | j                  | j                  | j                  ||            }t        |t        t        f      sJ t        |              || j                     }t        j                  |xs d ||      |      S r  )r^   r  r   r  r  rW  rr   r   r   r   r  r  )rU  r  r  r  r  r   r   s          rv   r  z$MultiOutputReduction.store_reduction|  s     JJNNMM$/	
 &5$-0CT&\NC0t(()"";#;)WT]ERRrx   )r   r  rL  r  r  z)Union[INNER_FN_TY, Sequence[INNER_FN_TY]]rX  r  r  r  r  rD   r  r  r  rH   r  rs   r  )r   r   r   r   r  r  r  r  s   @rv   r  r  T  s    #)#) #) =	#)
 "#) ,#) &#) #) &#) #)JS"S 3S 	S
 )S 
Srx   r  c                  ^    e Zd Zeej
                  df	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Zy)OnlineSoftmaxReductionNc
           	         t        fdt        |      D              }
|
D ]  }|j                           |
S )z>
        Create the reduction disregarding splitting.
        c              3  j   K   | ]*  }t         j                  t        d |	             , yw)r  N)rm   ry  r  )	r  
output_idxr   rL  rW  rX  r  r  r  s	     rv   r  z0OnlineSoftmaxReduction.create.<locals>.<genexpr>  sI      
  $$+"

s   03)r   r   r  )rz  r   rL  r  rW  rX  r  
num_outputr  rN  resultsr   s    `````` `   rv   ry  zOnlineSoftmaxReduction.create  sG       
 
 $J/
 
   	AIIK	rx   )r   r  rL  r  r  r  rW  rV  rX  r  r  r  r  rs   r  rH   rN  r  r   Sequence[TensorBox])r   r   r   r  rH   r  ry  r   rx   rv   r  r    s     )6(=(='+!! ! 	!
 %! ! )! ! &! %! 
! !rx   r  c                      e Zd Zeej
                  f	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Ze	 	 	 	 	 	 dd       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Z	y)WelfordReductionc                   |dv sJ t         j                  j                  j                  t	                    }d
fd}	|dk(  r |	d      }
 |	d      } |	d      }|
||fS |dk(  r@	 	 	 	 dfd|dk(  r |d          |	d       |	d      fS t        fd|D              S t        j                  |d   ||      \  }}|t        j                  k(  r|}|dkD  r| j                  ||||      S t        d	      D cg c](  }t        j                  t        ||||	            * }}|D ]  }|j                           |S c c}w )N)r  r  c                X     d fd}t         j                  |t                    S )Nc                0    t        j                        S rq   rx  )r   r   ru  s    rv   rW  z8WelfordReduction.create.<locals>.const.<locals>.inner_fn  s    || rx   r  r   r  r   r_   r  ry  r   )ru  rW  r   r   rX  s   ` rv   constz&WelfordReduction.create.<locals>.const  s2     ##!F|	 $  rx   r   r5   c                X     d fd}t         j                  |t                    S )Nc                n    D cg c]  }t         j                  j                   }} | |      S c c}w rq   r}  )r   r   r~  r  r  s      rv   rW  z7WelfordReduction.create.<locals>.copy.<locals>.inner_fn  s1    =M&Nuww||&NO&N!#77 'Or  r  r  r  )r  rW  r   r   rX  r  s   ` rv   copyz%WelfordReduction.create.<locals>.copy  s2    8 !''!%<	 (  rx   r  c              3  .   K   | ]  } |        y wrq   r   )r  r   r  s     rv   r  z*WelfordReduction.create.<locals>.<genexpr>  s     :"T"X:   )r  rM  r   )ru  rs   r   rm   )r  z4Callable[[Sequence[Expr], Sequence[Expr]], OpsValue]r   rm   )r`   r   r   r  r[   r   r  r]  rH   r  r  r   rm   ry  r  r  )rz  r   r   r  rX  r  r  r  rM  r  meanm2weightr  rT  r  r  r   r  s    `` ``            @rv   ry  zWelfordReduction.create  s    !FFFF''**33MBR4ST	 a8DqB1XFV##aL  !11IaL)58U1X==:	:::&  **aL)+ + 	
e ]222!N19(( 	 	2 $Ah
   $""

 
   	AIIK	%
s   -Ec                     y)N)r   r   r   r   r  s     rv   r  zWelfordReduction.default_value.  s     rx   c	                    t              t        j                  j                  j	                  t        j                  z  d             }	|	rH|dk7  rC	 	 	 	 	 	 	 	 d
fd}
 j                  ||d   t        |
d      t        |
d      f|d|      S t        dz
  z         t        j                  |t         fd|D              g |g||      }|D ]  }|j                           	 	 	 	 	 	 	 	 ddt        j                  j                  j                  t        |            } j                  ||      }t        j                  |t        fd	|D              |gd|      S )r  r   r  c                0    t        j                  |      S rq   rx  )r   r  r   r   s      rv   r  z4WelfordReduction.create_multilayer.<locals>.constantM  s     ||E511rx   r   r5   )r   r   r  rX  r  r  rT  r  c           	   3  L   K   | ]  }j                  |d         yw)r   )r  N)r  )r  r  r  rz  rM  r  rT  s     rv   r  z5WelfordReduction.create_multilayer.<locals>.<genexpr>e  s=      
  ++$# , 
s   !$c                     |g | |      S rq   r   )r   r~  r  s      rv   intermediate_loader_fnzBWelfordReduction.create_multilayer.<locals>.intermediate_loader_fnx  s    
 4E4O455rx   c              3  T   K   | ]  }t        |j                                 ! yw))r  N)r
   r  )r  r   r  s     rv   r  z5WelfordReduction.create_multilayer.<locals>.<genexpr>  s*       .q}}GG   %()r   r  r  r  r   rs   r   r_   )r   r  r~  r  r  r  r   r_   )r[   r`   r   r   r-   r   r  r  r
   r2   r  ry  r   r  r   r  )rz  r   r   r  rX  r  r  rT  r  r  r  intermediatesr   r   r  r  rM  s   ` `  ` `      @@@rv   r  z"WelfordReduction.create_multilayer4  s     ((89((>>HH_u,a0
 
	 +<<2#24B2KN22
 ((aLHA.HA.
 !10- )   o;UC
(// 
 (
 
 feL#
&  	AIIK		6!	6+	6 9	6 		6 WW%%//f0EF
99:~
  && &  G
 	
rx   N)r   r  r   r  r  Sequence[Callable[..., Any]]rX  r  r  r  r  rD   r  rH   r   r  r  )r   r  r   r  r  r  rX  r  r  r  r  rD   rT  ri   r  rH   r   r  )
r   r   r   r  rH   r  ry  r/  r  r  r   rx   rv   r  r    s    )6(=(=vv v 0	v
 v (v &v &v 
v vp $/	, 
 Z
Z
 Z
 0	Z

 Z
 (Z
 &Z
 Z
 &Z
 
Z
 Z
rx   r  c                  b    e Zd ZU ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   dd fdZd  fdZ	 	 	 	 	 	 	 	 	 	 d!dZd"dZd#dZd$dZ	d$dZ
d%dZd&dZdddZeej                   fdd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d'd       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d(d       Z xZS ))Scanr  scan_rangesr   =Callable[[tuple[Any, ...], tuple[Any, ...]], tuple[Any, ...]]rf  zFCallable[[Sequence[_IntLike], Sequence[_IntLike]], Sequence[_IntLike]]r   rH   r  rs   r  tuple[torch.dtype, ...]dtypestuple[Callable[..., Any], ...]r  c                    t         |          t               j                  fd| j                  D         z   t               j                  fd| j
                  D         z  S )Nc              3  6   K   | ]  }t        |        y wrq   r[  r\  s     rv   r  z,Scan.get_free_symbol_uses.<locals>.<genexpr>       O"1m4Or^  c              3  6   K   | ]  }t        |        y wrq   r[  r\  s     rv   r  z,Scan.get_free_symbol_uses.<locals>.<genexpr>       H"1m4Hr^  )ri  r  r0   r_  r  r   r	  s    `rv   r  zScan.get_free_symbol_uses  sa    
 G(7 jl  Od>N>NO !jl  HdiiH		
rx   c                    t        | j                        t        | j                        z   t        | j                        k(  sJ t        |           y rq   )r   rX  r  r   ri  r\  rj  s    rv   r\  zScan.__post_init__  =    4;;#d&6&6"773tyy>IIIrx   c                   | j                  ||      t        fd| j                  D              }t        j                  | j
                  | j                  |      }t        j                  |xs d |      || j                           S )Nc              3  .   K   | ]  } |        y wrq   r   r  rW  r   s     rv   r  z'Scan.store_reduction.<locals>.<genexpr>       Dx}Dr  r  )	r   r   r  r^   r  r  rf  r  r  )rU  r  r  r  	scan_varsr   resultr   s          @rv   r  zScan.store_reduction  so     ll4+DT^^DD$++t?yy$9gclF4;L;L4M
 	
rx   c                     y)Ncustomr   r[  s    rv   r  zScan.get_reduction_type  s    rx   c                    | j                   S rq   )r  r[  s    rv   r  zScan.get_reduction_size  rm  rx   c                    | j                   S rq   r   r[  s    rv   r   zScan.get_size      yyrx   c                    | j                   S rq   rm  r[  s    rv   rv  zScan.get_pointwise_size  rr  rx   c                X    t        | j                        t        | j                        z   S rq   )r   rX  r  r[  s    rv   r  zScan.index_length  !    4;;#d&6&6"777rx   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }|fS rq   )r  rX  r  r4   r  r   rU  r   r  r   s       rv   r  zScan.inner_fn_args  E    DKK(T--t}}=ll5&)vrx   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }t        | j                  ||      S r  )r  rX  r  r4   r  r   r=   rW  rU  r   r   r  r   s        rv   r`  zScan.inner_fn_free_symbols  Q    DKK(T--t}}=ll5&)#DMM3mTTrx   T)can_fallback_to_atenc                  g |d  |dz   d  |   gt         j                  j                  |t        j                        sd gt        |      z  S t        |      dkD  r=t         j                  j                  |t        j                        sd gt        |      z  S t         j                  j                  }
|
j                  t                    }t        |      t        |      k(  sJ |
j                  t        j                  |d            r?t        t        |            D cg c]!  }t        j                  |||   ||   |      # c}S | j!                  ||d   |d   ||      \  }}t"        }|dkD  r[t$        j&                  j(                  d u xs t*        xr	 t,        dk\  xr t        |      dk(  }|s|rd gt        |      z  S d}nt.        }dfd}t        t        |            D cg c]0  }t0        j                   |d	|||   |||   ||||||d|	      2 }}|D ]  }|j3                           |S c c}w c c}w )
Nr5   r  r   )r   r   rW  axispointwise_rangesr  rf  
scan_numelz3.3.0c                    t        |      t              k(  sJ t        |       t              k(  sJ g | d  || d  S rq   r   )r   
scan_indexr  r   r  s     rv   r   zScan.create.<locals>.reindex	  S    z?c+&6666u:%5!6666>U5D\>J>tu>>rx   )r   r   r  rW  r  r   rX  r  rf  r   r  r  )r   r  r$  r  r   r  r   )r`   r   r=  r8   SCANr   TUPLE_REDUCTIONr   r  r[   r-   r   Ler   r  ry  r]  r  r   versionhip
has_tritontriton_version	SplitScanrm   r  )rz  r   r  r  r   r  rf  r  r  r   r   r!  r  r]  	scan_typesupports_splitr   r  r  r   r  s        `             @@rv   ry  zScan.create  s    =T%4[<4q
+;<Dzlww""6>+>+>?6CK''v;?177#6#6N22$
 6CK''77##&&}['AB
6{c)n,,, ))%((:q*AB %*#f+$6 !   ! .&|4	 !   &)^^)q\-#!! &4 	&
"
 	>!!T)Wj.V^w=V%v;!#  "' 6CK//!"J%		?. !&c&k 2%
$ #  ! .!&|4'+ +)##1!- 
 
*  	FNN	 N
s   )&I85Ic	           
     N    dfd}	t         j                  ||||	||d|      S )Nc                ,     g | d  || d        S rq   r   )r   r  r  rW  s     rv   r  z#Scan.num_splits.<locals>.wrapper_fnK	  s*    Fc%4jF=F3tu:FGGrx   r  )r   rL  r  rW  rX  r  r  rM  )r   r  r  r  r   r_   )r  r]  )
rz  r   r   rW  r  r   r  rf  r!  r  s
      ``     rv   r]  zScan.num_splits>	  s;    	H ###(!& $ 	
 		
rx   r  r  r  )
r  r  r  z%Callable[[Sequence[_IntLike]], Never]r  r  r  r  r   r   r  r)  r  r&  r  )r   r  r  r  r  z+tuple[Callable[[Sequence[Expr]], Any], ...]r   r  r  rs   rf  r  r  rH   r  r   r   r   r   Sequence[Optional[TensorBox]])r   r  r   r  rW  r  r  rs   r   r  r  r  rf  r  r!  r   r   r  )r   r   r   r   r  r\  r  r  r  r   rv  r  r  r`  r  rH   r  ry  r]  r  r  s   @rv   r  r    s   
MMSS!!##--
 
"
 7
 	

 $
 

 8U  )6(=(=_ &*__ (_ ?	_
 _ _ R_ &_ #_ _ 
'_ _B 

 
 7	

 
 (
 #
 R
 
 
(
 
rx   r  c                      e Zd Zy)r-  Nr   r   r   r   rx   rv   r-  r-  [	  s    rx   r-  c                  0    e Zd ZU ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<   dd fdZd fdZ	 	 	 	 	 	 	 	 	 	 ddZd dZd!dZd!dZ	d!dZ
d"dZd#dZdddZeej                   f	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d$d       Z xZS )%Sortr  sort_rangesr   z:Callable[[Sequence[Expr], Sequence[Expr]], Sequence[Expr]]r   rH   r  rs   r  r  r  r   r  r   stable
descendingc                    t         |          t               j                  fd| j                  D         z   t               j                  fd| j
                  D         z  S )Nc              3  6   K   | ]  }t        |        y wrq   r[  r\  s     rv   r  z,Sort.get_free_symbol_uses.<locals>.<genexpr>u	  r  r^  c              3  6   K   | ]  }t        |        y wrq   r[  r\  s     rv   r  z,Sort.get_free_symbol_uses.<locals>.<genexpr>x	  r  r^  )ri  r  r0   r_  r7  r   r	  s    `rv   r  zSort.get_free_symbol_usesq	  s_    G(7 jl  Od>N>NO !jl  HdiiH		
rx   c                    t        | j                        t        | j                        z   t        | j                        k(  sJ t        |           y rq   )r   rX  r7  r   ri  r\  rj  s    rv   r\  zSort.__post_init__|	  r  rx   c                ,   | j                  ||      t        fd| j                  D              }t        j                  | j
                  || j                  | j                        }t        j                  |xs d |      || j                           S )Nc              3  .   K   | ]  } |        y wrq   r   r
  s     rv   r  z'Sort.store_reduction.<locals>.<genexpr>	  r  r  r  )
r   r   r  r^   sortr  r8  r9  r  r  )rU  r  r  r  r  r   r  r   s          @rv   r  zSort.store_reduction	  su     ll40DT^^DD$++vt{{DOOLyy$9gclF4;L;L4M
 	
rx   c                     y)Nr@  r   r[  s    rv   r  zSort.get_reduction_type	  s    rx   c                    | j                   S rq   )r7  r[  s    rv   r  zSort.get_reduction_size	  rm  rx   c                    | j                   S rq   r  r[  s    rv   r   zSort.get_size	  r  rx   c                    | j                   S rq   rm  r[  s    rv   rv  zSort.get_pointwise_size	  rr  rx   c                X    t        | j                        t        | j                        z   S rq   )r   rX  r7  r[  s    rv   r  zSort.index_length	  r  rx   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }|fS rq   )r  rX  r7  r4   r  r   r  s       rv   r  zSort.inner_fn_args	  r  rx   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }t        | j                  ||      S r  )r  rX  r7  r4   r  r   r=   rW  r  s        rv   r`  zSort.inner_fn_free_symbols	  r  rx   c	                   g |d  |dz   d  |   gt         j                  j                  |t        j                        sd gt        |      z  S t         j                  j                  }
|
j                  t                    }d}t        j                  j                  xr% |
j                  t        j                  ||            }|sd gt        |      z  S t        |      t        |      k(  sJ |
j                  t        j                  |d            r?t        t        |            D cg c]!  }t         j#                  |||   ||   |      # c}S dfd}t        t        |            D cg c]4  }t$        j#                  t'        d|||   |||   |||||||d|	      6 }}|D ]  }|j)                           |S c c}w c c}w )Nr5   r  r  c                    t        |      t              k(  sJ t        |       t              k(  sJ g | d  || d  S rq   r#  )r   
sort_indexr  r   r7  s     rv   r   zSort.create.<locals>.reindex	  r%  rx   )r   r   r  rW  r  r   rX  r7  r   r  r  r8  r9  )r   r  rJ  r  r   r  r   )r`   r   r=  r8   SORTr   r   r  r[   r6   r  persistent_reductionsr-   r   r(  r   r  ry  rm   r6  r  )rz  r   r  r  r   r  r8  r9  r  r   r   
sort_numel
max_rblockis_persistent_kernelr  r   r  r  r   r7  s        `            @@rv   ry  zSort.create	  s    =T%4[<4q
+;<Dzlww""6>+>+>?6CK''77##&&}['AB
 
MM// Q..uxx
J/OP 	 $6CK''6{c)n,,, ))%((:q*AB %*#f+$6 !   ! .&|4	 !  	?0 !&c&k 2'
& %  ! .!&|4'+ +##1!-!) 
 
,  	FNN	 Q
s   2&G99Gr  r  r  )
r  r  r  r  r  r  r  r  r   r   r  r  r&  r  )r   r  r  r  r  z'tuple[Callable[[list[Expr]], Any], ...]r   r  r  rs   r8  r   r9  r   r  rH   r   r   r   r2  )r   r   r   r   r  r\  r  r  r  r   rv  r  r  r`  r  rH   r  ry  r  r  s   @rv   r6  r6  `	  s*    
GG!!##--L	
 
"
 2
 	

 '
 

 8U  )6(=(=LL (L ;	L
 L L L L &L L 
'L Lrx   r6  c                >    	 t        | d       y# t        $ r Y yw xY w)NFfreezeT)r+  r  rt   s    rv   r   r   	  s&    a. s    	c                    	 t        | d      \  }}|j                         r|j                          |j                         S # t        $ r Y yw xY wNFrQ  )r+  should_pad_stridespad_stridesis_contiguousr  )ru   _bufferr*  s      rv    is_contiguous_storage_and_layoutrY  
  sR    /%@ $$& ##%% s   ?A 	AAc                   t        | t              rt        | j                  |||||      S t        | t              r:t        | j                  |||||      \  }}| | j                  j                         fS t        | t              r|rn|r0| j                          | j                         j                         s>J || j                  ||       n&|| j                  ||       n| j                          t	        |       | j                         fS t        | t              r(t        | j                  |      \  }}|| j                  fS t        )z
    Try to simplify x into a StorageBox and a Layout.

    allow_padding only affect how we apply stride_order. When allow_padding
    is True, we have the freedom to add padding when applying the stride_order.
    rR  want_contiguousstride_orderr  r  r  rQ  )rr   rm   r+  r)  
StorageBoxr   Bufferr  rW  r  r  r1  r.  r*  r  )	ru   rR  r\  r]  r  r  r   r*  buffers	            rv   r+  r+  
  sG    !Y$FF+%''
 	
 !Z )FF+%''
	6 !&&##%%%!V!||~33555)11  2  *22! 3  !!}alln,,!_% *FF
	 qxx
rx   c                d    	 t        | d      \  }}|j                  |      S # t        $ r Y yw xY wrT  )r+  is_stride_orderedr  )ru   r]  rX  r*  s       rv   "is_stride_order_storage_and_layoutrd  I
  s:    /%@''55 s    # 	//c                   t        | t        t        f      rt        | j                        S t        | t
              r[| j                  }t        |j                  t        |j                        z  t        z  dk(         }t        | j                        xs |S t        | t              r*| j                         t        j                  j                   v S y)Nr   F)rr   rm   r_  is_unalignedr)  r.  r*  r-   r-  rR   r   rT   r`  r  r`   r   unaligned_buffers)r   r*  has_unaligned_layouts      rv   rf  rf  S
  s    $J/0DII&&$(#8MMN6<<88?JaO$
  
 DII&>*>>$}}!''";";;; rx   c                      e Zd ZU ded<   dddZddZddZddZedd       Z	ddZ
d d	Zd!d
Zd"dZd#dZd$dZd%dZd&dZd Zd Zd%dZd%dZd'dZd(dZd Zd)dZy)*BaseViewrn   r)  c                8    | j                   j                  |      S rq   r)  r  r  s     rv   r  zBaseView.get_free_symbol_usesi
  s    yy--m<<rx   c                    t        d|        )Nzmake_reindexer NYI on r9  r[  s    rv   make_reindexerzBaseView.make_reindexerl
  s    !$:4&"ABBrx   c                l    | j                   j                         | j                         dfd}|S )Nc                       |             S rq   r   r   innerr   s    rv   r  z&BaseView.make_indexer.<locals>.indexers
      &&rx   )r   r  r   r   )r)  r  rn  )rU  r  rr  r   s     @@rv   r  zBaseView.make_indexero
  s/    		&&(%%'	' rx   c                l    | j                   j                         | j                         dfd}|S )Nc                       |             S rq   r   rq  s    rv   r  z$BaseView.make_loader.<locals>.loader|
  rs  rx   r  )r)  r  rn  )rU  r  rr  r   s     @@rv   r  zBaseView.make_loaderx
  s/    		%%'%%'	' rx   c                6    | j                   j                         S rq   )r)  r   r[  s    rv   r   zBaseView.dtype
  s    yy""$$rx   c                6    | j                   j                         S rq   r)  r   r[  s    rv   r   zBaseView.get_layout
      yy##%%rx   c                6    | j                   j                         S rq   r)  r   r[  s    rv   r   zBaseView.get_device
  ry  rx   c                     y rq   r   r[  s    rv   rl  zBaseView.get_origin_node
  rp  rx   c                6    | j                   j                         S rq   r)  r  r[  s    rv   r  zBaseView.get_name
      yy!!##rx   c                "    | j                         S rq   r  r[  s    rv   rv  zBaseView.get_pointwise_size
      }}rx   c                8    | j                   j                  |      S rq   r)  r  r  s     rv   r  zBaseView.mark_reuse
      yy##E**rx   c                6    | j                   j                         S rq   r)  r  r[  s    rv   r  zBaseView.has_exceeded_max_reads
      yy//11rx   c                6    | j                   j                         S rq   r)  r  r[  s    rv   r  zBaseView.realize
      yy  ""rx   c                6    | j                   j                         S rq   r)  r  r[  s    rv   r  zBaseView.realize_hint
      yy%%''rx   c                6    | j                   j                         S rq   r)  r  r[  s    rv   r  zBaseView.get_storage_numel
      yy**,,rx   c                6    | j                   j                         S rq   r)  r  r[  s    rv   r  zBaseView.is_extern
      yy""$$rx   c                6    | j                   j                         S rq   )r)  is_module_bufferr[  s    rv   r  zBaseView.is_module_buffer
      yy))++rx   c                6    | j                   j                         S rq   r)  rf  r[  s    rv   rf  zBaseView.get_read_names
      yy''))rx   c                    t        j                  t        dd      5  t        | j	                         | j                               j                  cd d d        S # 1 sw Y   y xY wr  )r   r   r   r?   r  r   r  r[  s    rv   re  zBaseView.get_reads
  sL    \\.*:DA 	&  " e		 	 	s   2AA!c                d    | }t        |t              r|j                  }t        |t              r|S rq   )rr   rj  r)  )rU  ru   s     rv   r  zBaseView.unwrap_view
  s+    H%A H%rx   c                    | j                         } t        j                  t        d|      |      }t	        || j                         || j                               S r  )r  r   r   r  r  r   r   r  s      rv   r  zBaseView.constant_to_device
  sP    !!#Hn.?HP.."==?	
 	
rx   Nr  r  )r   z*Callable[[Sequence[Expr]], Sequence[Expr]]r  r  r  r  r  r  r  r  r  r  r  r  r%  r+  )r   r   r   r   r  rn  r  r  r2  r   r   r   rl  r  rv  r  r  r  r  r  r  r  rf  re  r  r  r   rx   rv   rj  rj  e
  s    
L=C % %&&$+2#(-%,*	
rx   rj  c                  F    e Zd ZU ded<   ed        Zed        ZddZd Z	y)	r   r  r   c                r   t         j                  j                  }t        t	        t
        j                  |            }| j                         }dgt        |      t        |      z
  z  t        |      z   }t        |      t        |      k(  sJ t        t        |            D ]  }||   dk(  r||   J ||   ||<   ||   Lt         j                  j                  j                  j                  t        j                  ||   d      d      rm|j                  ||   ||   z
  d      dk(  rJ d        |S )	zReplace `-1` with correct sizesNr  r5   Tsize_obliviousr   fallbackzKBroadcast failed in ExpandView({x.get_size()}, {new_size}) on dimension {i})r`   r   r   r   ry  r   ri  r   r   r   r   evaluate_exprr  r   )ru   new_sizer   old_sizer   s        rv   _normalize_sizezExpandView._normalize_size
  s1    77##ELL(34::<6S]S]:;d8nL8}H---s8}% 	A{b {...&qk!$(8(8(B(B(P(P!a( )Q )   ))(1+*Ca)PTUU aU	" rx   c           	        | j                  ||      }t        |      r3t        |      \  }}t        |      t        |j                        z
  }|dk\  sJ t
        j                  j                  g|z  }t        |j                  |j                        D ]y  \  }}|j                  t        j                  j                  j                  j                  t        j                   |d      d      s|nt
        j                  j                         { t#        |j$                  |j&                  t)        |      ||j*                        }	t-        ||	      S t/        ||      S )Nr   r5   Tr  r(  )r)  r   )r  r   r+  r   r   r   r  r  r   r   r/  r`   r   r   r   r  r  r,  r   r   r   r-  r.  r   )
rz  ru   r  r1  r2  skipr3  r   r   r4  s
             rv   ry  zExpandView.create
  s'   &&q(3 #"7":GZx=3z#77D199'',,$.J #J$5$5z G !!77++55CCq)$ D    %!!  X!!J #
CCqx00rx   c                    | j                   S rq   r  r[  s    rv   r   zExpandView.get_size   r  rx   c                    | j                         }| j                  j                         t        |      t              z
  fd}|S )Nc                    t        | d        } t        |       t              k(  sJ t        t                    D ](  }|   dk(  st        j                  j
                  | |<   * | S r~  )r   r   r   r   r  r  )r   r   actualr  s     rv   r   z*ExpandView.make_reindexer.<locals>.reindex  sd    tu&Eu:V,,,3v;' ,!9>$ww||E!H, Lrx   )r   r)  r   )rU  targetr   r  r  s      @@rv   rn  zExpandView.make_reindexer  s>    ##%6{S[(	 rx   Nr  )
r   r   r   r   r/  r  r  ry  r   rn  r   rx   rv   r   r   
  s8    
 4 1 16rx   r   c                  F    e Zd ZU ded<   ed        Zed        ZddZd Zy)	PermuteViewr  dimsc           
        | j                  |      }t        |      t        t        t        |                  k(  sJ t	        |      r~t        |      \  }}t        |j                  |j                  |D cg c]  }|j                  |    c}|D cg c]  }|j                  |    c}|j                        }t        ||      S t        ||      S c c}w c c}w )Nr(  )r)  r  )_map_neg_dimsr0   r   r   r   r+  r,  r   r   r   r   r-  r.  r  )rz  ru   r  r1  r2  r   r4  s          rv   ry  zPermuteView.create  s      &$:eCI.>#???? #"7":GZ$!!  -12#2/34!""1%4!!J #
CC-- 34s   5CCc                R    |D cg c]  }|dk\  r|nt        |      |z    c}S c c}w r  r#  )rz  r  r  s      rv   r  zPermuteView._map_neg_dims*  s)    @DEsaxSY_4EEEs   $c                   t        | j                  | j                              t        t        t	        | j                                    k(  sJ | j
                  j                         }| j                  D cg c]  }||   	 c}S c c}w rq   )r0   r  r  r   r   r)  r   )rU  r   r   s      rv   r   zPermuteView.get_size.  so    $,,TYY78J#dii.!=
 
 	
 
 yy!!#!%+AQ+++s   7Bc                B   t        | j                        D ci c]  \  }}||
 c}}t        t        | j                              D cg c]  }|   	 c}t	              t	        t        t        | j                                    k(  sJ fd}|S c c}}w c c}w )Nc                4    D cg c]  }| |   	 c}S c c}w rq   r   )r   r   invs     rv   r   z+PermuteView.make_reindexer.<locals>.reindex:  s    &)*E!H***s   )r   r  r   r   r0   )rU  r   r[  r   r  s       @rv   rn  zPermuteView.make_reindexer5  s}     )$)) 451q!t5$S^45!s1v5#*U3tyy>-B"CCCC	+  65s   BBNr  )	r   r   r   r   r  ry  r  r   rn  r   rx   rv   r  r    s:    
. ." F F,rx   r  c                  <    e Zd Zeddd       Zedd       ZddZy)SqueezeViewN)r  c          	        t        |      rt        |      \  }}g }g }|6t        |t              sJ d       d|k  r|t	        |j
                        k  sJ t        t        |j
                  |j                              D ]g  \  }\  }}	|)|dk7  s|j                  |       |j                  |	       4||k7  r#|j                  |       |j                  |	       \|dk(  rbJ d        t        |j                  |j                  |||j                        }
t        ||
      S |8t        j!                  ||j#                         D cg c]
  }|dk7  s	| c}      S |j#                         |   dk(  sJ t        j!                  |t        |j#                               D cg c]  \  }}||k7  s| c}}      S c c}w c c}}w )Nzexpected integer dim argumentr   r5   zexpected squeezed size to be 1r(  )r   r+  rr   rs   r   r   r   r   r   r/  r,  r   r   r-  r.  r  ry  r   )rz  ru   r  r1  r2  r  r3  r   r   r   r4  r   s               rv   ry  zSqueezeView.createB  s    #"7":GZHJ!#s+L-LL+CxC#joo*>$>>>%.s:??JDUDU/V%W 
K!>D&;qy -"))&1Cx -"))&1#qyJ*JJy
K %!!  !!J #
CC;;;qajjl"Ea1f1"EFF::<$)));;q1::<1H"UAAQTH1"UVV #F #Vs   
F<
F<
"G0Gc                    | D cg c]
  }|dk7  s	| }}t        |       D cg c]  \  }}|dk7  s| c}}t        |       dfd}||fS c c}w c c}}w )Nr5   c                    t        |       t              k(  sJ |  d        t        j                  j                  gz  }t	        |       D ]
  \  }}|||<    t        |      S )N )r   r   r  r  r   r   )r   r  r   r   lengthnot_ones       rv   r   z%SqueezeView.squeezer.<locals>.reindexn  sk    u:W-C%'/CC-/Igu- #Q!"	####rx   )r   zlist[sympy.Expr]r   ztuple[sympy.Expr, ...])r   r   )r   r   r  r   r   r  r  s        @@rv   squeezerzSqueezeView.squeezerh  s]    #.!qAvA..!*4;AAF1;T	$    /;s   
AAAAc                    t        d      )Nzuse SqueezeView.create())AssertionError)rU  r)  s     rv   r  zSqueezeView.__init__w  s    788rx   )r   r*  r  )r   r   r   r  ry  r/  r  r  r   rx   rv   r  r  @  s3    " #W #WJ ! !9rx   r  c                  T    e Zd ZU ded<   ded<   d ZddZddZeZed        Z	dd	Z
y
)GenericViewr  r   rV  r   c                    | j                   S rq   )r   r[  s    rv   rn  zGenericView.make_reindexer      ||rx   c                   t        t        | j                              D cg c]  }t        t        j
                  |       }}t        | j                  |            }ddj                  t        t        |             d| S c c}w )Nzlambda , r   )r   r   r   rZ   r4   r  r   r   r{  ry  r   )rU  r  	index_old	index_news       rv   reindex_strzGenericView.reindex_str  ss    CHTYYCX
>?*4::q9
	 
 i01	3sI#6789+FF	
s   !Bc                z    | j                  | j                  d| j                   d| j                          g      S )Nsize=zreindex=)r  r)  r   r  r[  s    rv   ro  zGenericView.__str__  s=    YY%		{+x8H8H8J7K-LM
 	
rx   c                *     | |t        |      |      S )Nr)  r   r   )r   )rz  ru   r  r   s       rv   ry  zGenericView.create  s    X@@rx   c                    | j                   S rq   r  r[  s    rv   r   zGenericView.get_size  r  rx   Nr  r  )r   r   r   r   rn  r  ro  r  r  ry  r   r   rx   rv   r  r  {  s>    
G

 HA Arx   r  c                  t    e Zd Zed        Zed        Zed        Ze	 d	 	 	 	 	 	 	 dd       Zedd	d       Z	y)
r  c                    t        j                  |       } t        j                  |      }t        j                  j                  j
                  j                  } |t        j                  | d            r| |z   } | S r  )r   ri  r`   r   r   r   r  Lt)r   r   r  s      rv   handle_negative_indexzView.handle_negative_index  sZ    ll3||D!((22@@#q)**C
rx   c                  	 t        |t        t        f      sJ | j                  |j	                         |      \  	}t
        j                  j                  j                  	|      r|S d}t        t        	            dkD  st        t        |            dkD  rd}d|v r	fd} | |t        |      |      S t        |      s|r|r t        |      st        j                  |      }t        |d      \  }}t        |j                   |j"                  |t$        j'                  |      |j(                        }t+        ||      S | j-                  	|      } | |t        |      |      S )NFr   Tc                4    t        dgt              z        S r  )r   r   )r   r  s    rv   fake_reindexz!View.create.<locals>.fake_reindex  s    aS3x=011rx   r  )r\  r(  )rr   r   r   resolve_negative_sizer   r`   r   r   statically_known_list_equalsr   r(   rY  ExternKernelrequire_contiguousr+  r,  r   r   r   r   r-  r.  r  )
rz  ru   r  unbacked_symbols_in_sizesr  r1  r2  r4  r   r  s
            @rv   ry  zView.create  sL   (UDM222 66qzz|XN( 77888LH$)!%h/014(23a7(,%=2 ADNLII-a04M(2RST2U !33A6"74"PGZ$!!  11(;!!J #
CC--hAX@@rx   c                F   |D cg c]+  }t         j                  j                  j                  |      - }}| D cg c]+  }t         j                  j                  j                  |      - } }t	        |      }t        t        |            D ]J  }||   dk(  st        j                  j                  ||<   t        t        |       t        |            ||<    n t         j                  j                  j                  t        |       t        |             | |fS c c}w c c}w )Nr  )r`   r   r   r  r   r   r   r   r  Oner1   r[   guard_equals)r  r  ru   r   s       rv   r  zView.resolve_negative_size  s    :BCQAGG$$--a0CC:BCQAGG$$--a0CC>s8}% 	A{b #ggkk&}X'>h@WX		 	
%%mH&=}X?VW!! DCs
   0D0DNc                    	 | j                  |||      }|S # t        t        f$ r@ t        |      g}| j                  ||      }| j                  ||      }t	        ||      }Y |S w xY wrq   )_dynamic_reshape_indexerr  
IndexErrorr[   r   )rz  r  r  	dense_dimr   flatr   r   s           rv   r  zView.dynamic_reshape_indexer  sz    	:228XyQG  
+ 	:!(+,D33HdCH33D(CH%h9G	:s    AA&%A&c                j   t         j                  j                  j                  }t	        t        |            D cg c]  }t        t        j                  |       c}t        t        |            }t        |       }|duxr! |t        |      dz
  k7  xr t        |      dk(  }|r&|J |j                  |      }|j                  |       g |r=|r:|j                         }	|j                         \  }
}|	dk(  r>j                  t        j                  j                         |j                  |
|f       n|dk(  r|j                  |	       n ||       ||	      k(  r=j                  |
       t         j                  j                  j!                  ||	       nh ||       ||	      k  r ||       ||	      k  r2|j                         \  }}||z  |
z   }
||z  } ||       ||	      k  r2j                  |
       t         j                  j                  j!                  ||	       n ||       ||	      kD  rt        j                  j"                  }|	}j                  t%        |
||             ||z  } ||       ||	      kD  rH|j                         }j                  t%        |
||             ||z  }|	|z  }	 ||       ||	      kD  rHt         j                  j                  j!                  ||	       nt&        |r|r:|rf|j                         }	t         j                  j                  j!                  |	d       j                  t        j                  j                         |rf|r@|j                         \  }
}t         j                  j                  j!                  |d       |r@|At        |      dk(  r3j)                          j                         }j+                  ||       nj)                          t              t        |       k(  sJ fd}|S c c}w )zG
        Perform a reshape entirely by modifying indexing math
        Nr5   c                    t        |       t              k(  sJ t        |       t              f       t        t        |             t        fdD              S )Nc              3  6   K   | ]  }t        |        y wrq   )r\   )r  ru   replacementss     rv   r  zAView._dynamic_reshape_indexer.<locals>.reindex.<locals>.<genexpr>B  s     HA|4Hr^  )r   r   r   r   )r   r  r  	view_exprs    @rv   r   z.View._dynamic_reshape_indexer.<locals>.reindex?  sM    u:T*CSZT,CC*D% 01LHiHHHrx   )r`   r   r   r   r   r   rZ   r4   VIEWr   r   rx  r/  r   r  r  r  r  r3   r  reverseinsert)r  r  r  r   r   	stack_new	stack_oldreordering_dense_dimold_dimsize_oldvarsize_newvar2	size_new2divisormodulus
dense_exprr   r  r  s                     @@rv   r  zView._dynamic_reshape_indexer  s   
 GG$$..	 CHHBV
=>*499a8
 T8,-	N	 T! #S^a//#H" 	
  (((mmI.GW%	I }}H%MMOMC1}  .  #x1Q  *8$	((;;  %  --hA8$y'::)Ih,??&/mmoOD)/C/C')3H  )Ih,??   %  --hA8$y'::''++"  gw!GH!G+)Ih,??'mmoG$$_S'7%KL%/G''1H	  )Ih,??
   --hA$$= I@  }}HGG))(A6UWW\\* 
 %MMOMCGG))(A6   S]a%7"JY
39~X...	I
 W
s   !P0rq   )r  r  r  r  r  r  r   &Callable[[Sequence[_T]], Sequence[_V]])r  r  )
r   r   r   r/  r  r  ry  r  r  r  r   rx   rv   r  r    s      *A *AX " " 
 $(	$ % !	
 
0   R Rrx   r  c                       e Zd ZU dZded<   d fdZddZeZddZddZ	ddZ
ed	        Zdd
Zd ZddZddZddZd Z	 d	 	 	 ddZdddZddZ xZS ) r.  z*Pretend our storage has a different layoutr  r*  c                    t         |           t        | j                  t              r0t
        j                  | d| j                  j                                y y )Nr)  )ri  r\  rr   r)  rj  r   rT  r  rj  s    rv   r\  zReinterpretView.__post_init__M  s@    dii*tVTYY-B-B-DE +rx   c                P    | j                  | j                  | j                  g      S rq   )r  r)  r*  r[  s    rv   ro  zReinterpretView.__str__R  s&    		
 	
rx   c                6    | j                   j                         S rq   r~  r[  s    rv   r  zReinterpretView.get_name\  r  rx   c                .    | j                   j                  S rq   )r*  r   r[  s    rv   r   zReinterpretView.get_device_  s    {{!!!rx   c                     y rq   r   r[  s    rv   rl  zReinterpretView.get_origin_nodeb  rp  rx   c                .    | j                   j                  S rq   )r*  r   r[  s    rv   r   zReinterpretView.dtypee  s    {{   rx   c                @    t        | j                  j                        S rq   )r   r*  r   r[  s    rv   r   zReinterpretView.get_sizei  s    DKK$$%%rx   c                @    t        | j                  j                        S rq   )r   r*  r   r[  s    rv   r  zReinterpretView.get_stridel  s    DKK&&''rx   c                     d fd}|S )Nc                T   j                   j                         }t        j                  j	                          ||             }j                   j
                  j                  j
                  k7  r5t        j                  |j
                  j                  j
                        S |S rq   )r*  r  r^   loadr  r   r)  to_dtype_bitcast)r   r  
tmp_loaderrU  s      rv   r  z+ReinterpretView.make_loader.<locals>.loaderp  sp    kk..0G$--/75>BJ{{  DIIOO3++J

DIIOOTT!!rx   r   r  r   r_   r   rU  r  s   ` rv   r  zReinterpretView.make_loadero  s    	" rx   c                6    | j                   j                         S rq   )r*  r  r[  s    rv   r  zReinterpretView.make_indexerz      {{''))rx   c                    | j                   S rq   r*  r[  s    rv   r   zReinterpretView.get_layout}  rr  rx   c                     y rq   r   r[  s    rv   r  zReinterpretView.freeze_layout  r  rx   c                    t        | j                  j                  |      t        | j                  j                  |      z  t        | j                  j                  |      z  S rq   )r   r*  r   r   r-  r  s     rv   r  z$ReinterpretView.get_free_symbol_uses  sQ     T[[--}=t{{11=ABt{{11=AB	
rx   c                t   t         j                  j                  j                  | j                  | j
                  j                  | j
                  j                  | j
                  j                  ||j                  n#t         j                  j                  j                  | j
                  j                        S r  )r`   r   wrapper_codecodegen_reinterpret_viewr)  r*  r   r   r-  	writeliner   r  s     rv   r  z!ReinterpretView.codegen_reference  s     ww##<<IIKKKKKK & 2F8L8L8V8V++## = 
 	
rx   c                     yr~  r   r[  s    rv   r  zReinterpretView.num_reads      rx   r  r  r  r  r  r  r  r  r  r(  rq   r  r&  )r   r   r   __doc__r   r\  ro  r  r  r   rl  r2  r   r   r  r  r  r   r  r  r  r  r  r  s   @rv   r.  r.  G  s    4NF

 H$" ! !&(	* %*
!
	!

rx   r.  c                  X    e Zd ZU dZded<   ed        Zd
dZeZe	d        Z
ddZddZy	)	DtypeViewz(Pretend our storage has a different typer  target_dtypec                    t        |      rRt        |      \  }}t        |j                  ||j                  |j
                  |j                        }t        ||      S t        ||      S )Nr(  )r)  r  )	r   r+  r,  r   r   r   r-  r.  r  )rz  ru   	new_dtyper1  r2  r4  s         rv   ry  zDtypeView.create  sd     #"7":GZ$!!!!!!J #
CCai88rx   c                P    | j                  | j                  | j                  g      S rq   )r  r)  r  r[  s    rv   ro  zDtypeView.__str__  s     		4+<+<=>>rx   c                    | j                   S rq   )r  r[  s    rv   r   zDtypeView.dtype  s       rx   c                6    | j                   j                         S rq   r)  r   r[  s    rv   r   zDtypeView.get_size  r  rx   c                J      j                   j                          fd}|S )Nc                z    t        j                   |       j                  j                  j                        S rq   )r^   r  r  r)  r   )r   rr  rU  s    rv   r  z%DtypeView.make_loader.<locals>.loader  s*    ''c
D4E4EtyyWWrx   r)  r  )rU  r  rr  s   ` @rv   r  zDtypeView.make_loader  s"    		%%'	X rx   Nr  r  r  )r   r   r   r  r   r  ry  ro  r  r2  r   r   r  r   rx   rv   r  r    sE    29 9? H! !$rx   r  c                  .    e Zd Zed        Zedd       Zy)	SliceViewc                T   	
 t         j                  j                  
|j                         |   t	        d ||fD              r!t
        j                  	t
        j                  n
j                  	
j                  	
fd fd} ||dd      } |||      }||fS )zz
        Normalize start and end such that both are in the range
        [0, x.get_size()[dim]] and start <= end.
        c              3  2   K   | ]  }t        |        y wrq   )r(   r  ru   s     rv   r  z0SliceView.normalize_start_end.<locals>.<genexpr>  s     HA$Q'H   c                    j                  | |      r| n | |      }j                  ||      r|}|S  ||      }|S rq   )statically_known_geqr  )ru   lowerupperclamped_lowerclamped_fullmax_funcmin_funcr   s        rv   clampz,SliceView.normalize_start_end.<locals>.clamp  s`    221e<(1eBT 
 00F  
   mU3 
  rx   c                D    | |S j                  |       }  | ||      S rq   )r  )ru  r+  r,  r  r1  rz  dim_sizes       rv   
clamp_wrapz1SliceView.normalize_start_end.<locals>.clamp_wrap  s.    {++C:CeU++rx   r   )
r`   r   r   r   r  r   MinMaxevaluate_minevaluate_max)rz  ru   r  startendr4  r1  r3  r/  r0  r   s   `     @@@@@rv   normalize_start_endzSliceView.normalize_start_end  s     77##::<$H%h1GHHyyHyyH,,H,,H		 	, 5!Xq1eXx8czrx   c           	        t        j                        t        t         j                        sdkD  sJ 	 dk(  r|dk\  rdk(  r|S t        |j                               |r| j                  ||      \  }t        |z
  dz
  z         <   t        |      r{t        |      \  }}t        |j                        }	|	   z  |	<   t        |j                  |j                  |	|j                  |j                     z  z         }
t!        ||
      S fd}t#        ||      S # t        $ r Y w xY w)Nr   l    r5   r(  c                    t        |       t              k(  sJ d|  d        t        |       } |    z  z   | <   | S )Nzwrong ndim r  )r   r   )r   r  r  r9  steps    rv   r   z!SliceView.create.<locals>.reindex  sP    u:X.P+eWAhZ0PP.KEsd*U2E#JLrx   r  )r   ri  rr   r   	TypeErrorr   r   r;  r2   r   r+  r   r,  r   r   r-  r.  r$  )rz  ru   r  r9  r:  r>  r1  r1  r2  r3  r4  r   r  s     `` `      @rv   ry  zSliceView.create  sJ   ||D!$

+tax77	zcY.419 

%
 00CDJE3 uq!94@ #"7":GZj//0J(o4JsO$!!  !!J$5$5c$:U$BBJ #
CC	 ah@@E  		s   D3 3	D?>D?N)r5   T)r   r   r   r  r;  ry  r   rx   rv   r$  r$    s+    " "H (A (Arx   r$  c                  B    e Zd ZU ded<   ded<   d
dZddZddZddZy	)BaseConstantr  r   r  r   c                     yNr   r   r[  s    rv   r   zBaseConstant.get_size  s    rx   c                    | j                   S rq   rq  r[  s    rv   r   zBaseConstant.get_device   rr  rx   c                     y rq   r   r[  s    rv   rl  zBaseConstant.get_origin_node#  rp  rx   c                    t               S rq   r/   r[  s    rv   re  zBaseConstant.get_reads&  rM  rx   Nr  r  r  r%  )r   r   r   r   r   r   rl  re  r   rx   rv   rA  rA    s"    rx   rA  c                  D    e Zd ZU ded<   ded<   ded<   ddZddZdd	Zy
)Constantr   r   r  r   r  r   c                     d fd}|S )Nc                X    t        j                  j                  j                        S rq   )r^   r  r   r   r   rU  s    rv   r  z$Constant.make_loader.<locals>.loader1  s    <<

DJJ77rx   r  r   r	  s   ` rv   r  zConstant.make_loader0  s    	8 rx   c                     y rq   r   r[  s    rv   r  zConstant.realize6  r  rx   c                F    t        | j                  | j                  |      S )N)r   r   r   )rH  r   r   r  s     rv   r  zConstant.constant_to_device9  s    djj

6JJrx   Nr  r  r+  )r   r   r   r   r  r  r  r   rx   rv   rH  rH  *  s#    JKrx   rH  c                  <    e Zd ZU ded<   ded<   ded<   d
dZddZy	)IndexingConstantr   r   r  r   r  r   c                     d fd}|S )Nc                X    t        j                  j                  j                        S rq   )r^   rj  r   r   rK  s    rv   r  z,IndexingConstant.make_loader.<locals>.loaderD  s    >>$**djj99rx   r  r   r	  s   ` rv   r  zIndexingConstant.make_loaderC  s    	: rx   c                F    t        | j                  | j                  |      S )N)r   r   r   )rO  r   r   r  s     rv   r  z#IndexingConstant.constant_to_deviceI  s    djj

6RRrx   Nr  r+  )r   r   r   r   r  r  r   rx   rv   rO  rO  =  s    JSrx   rO  c           	     b    t        d t        | t        j                  |      |      D              S )Nc              3  <   K   | ]  \  }}}|d k(  xs ||k(    ywr:  r   )r  leftrightr   s       rv   r  z2is_contiguous_strides_for_shape.<locals>.<genexpr>P  s.      D% 		"TU]"   )r  r   r   r   )r   r  s     rv   is_contiguous_strides_for_shaperX  M  s5      !$N55e<e"
  rx   c                <    t         j                  | j                  z  S rq   )r6   padding_alignment_bytesitemsizer  s    rv   get_align_for_dtyper\  X  s    ))U^^;;rx   c                       e Zd ZdZddZddZy)r   zxAbstract base for Layout, MultiOutputLayout, NoneLayout.
    Represents the memory layout of the output of an Operation.c                >    t        t        |       j                        rq   r  r[  s    rv   r   zOutputSpec.get_device`  r  rx   c                >    t        t        |       j                        rq   r  r[  s    rv   storage_sizezOutputSpec.storage_sizec  r  rx   Nr  r&  )r   r   r   r  r   r`  r   rx   rv   r   r   \  s    C77rx   r   c                      e Zd Zd ed      f	 	 	 	 	 	 	 	 	 	 	 ddZddZeZddZddZddZ	e
	 	 	 	 	 	 dd       Zdd	Zdd
Zd Ze
d        Zd Zd Zd ZddZddZddZy)r  Nr   c                    |t         j                  |      }|| _        || _        t	        |      t	        |      k(  sJ d| d|        t        d |D              sJ || _        || _        || _        y )Nr  	, stride=c              3  H   K   | ]  }t        |t        t        f        y wrq   )rr   r   rs   r;  s     rv   r  z"Layout.__init__.<locals>.<genexpr>v  s     <!:a$-<    ")	r   r   r   r   r   r  r   r   r-  )rU  r   r   r   r   r-  s         rv   r  zLayout.__init__i  sx     >#66t<F
4yCK'H5ix)HH'<t<<<< $	"("rx   c                X   d}| j                   dk7  rd| j                    }| j                  j                  dnd| j                  j                   }t        |       j                   d| j                  j                   | d| j
                   d| j                   d| j                   | d	S )
Nrr  r   z	, offset=:z('z', z, size=rc  r   )r-  r   r   r   r   r   r   r   )rU  r-  device_index_strs      rv   ro  zLayout.__str__{  s    ;;! .F!%!2!2!:2!DKKDUDUCV@WDz""#2dkk&6&6%78H7ITZZL YII;i}VHA?	
rx   c                    | j                   S rq   rq  r[  s    rv   r   zLayout.get_device  rr  rx   c                    t         j                  5  t        j                  t	        | j
                        t	        | j                        | j                  | j                        cd d d        S # 1 sw Y   y xY w)Nr   r   )	r`   	fake_moder   r   rN   r   r   r   r   r[  s    rv   get_examplezLayout.get_example  sR    [[ 	&&'		2'4jj{{		 	 	s   AA..A7c                B    t        | j                  | j                        S rq   )rX  r   r   r[  s    rv   rW  zLayout.is_contiguous  s    .t{{DIIFFrx   c                    t        |       }|dvs| d   dk(  ryt        |t        |       |       D ]  \  }}}|dk7  s||k7  s y y)N)r      r5   FT)r   r   r"   )r  r0  ndimrU  rV  r   s         rv   is_channels_last_contiguousz"Layout.is_channels_last_contiguous  sa     5zvqQ!$3E:E"
 	D% qyTU]		
 rx   c                    t        | j                  t        t        j	                  t        t        | j                                          | j                        D ]  \  }}}|dk7  s||k7  s y y)Nr5   FT)r   r   reversedr   r   r   r   )rU  rU  rV  r   s       rv   is_transposedzLayout.is_transposed  sc    !$KK^66tHTYY<O7PQRII"
 	D%
 qyTU]	 rx   c                   t        | j                        t        |      k(  sJ t        | j                        D cg c]5  \  }}t        j
                  j                  j                  |d      dk7  r|7 }}}|D cg c]  }| j                  |    }}|D cg c]  }||   	 }}d } ||      }dgt        |      z  }t        t        |            D ]  }||   |||   <    t        t        |      dz
        D ][  }||   ||dz      kD  }t        |t              s7t        j
                  j                  j                  ||   ||dz      kD  d      }|s[ y yc c}}w c c}w c c}w )	Nr   r  r5   c                `    t        |       }| D cg c]  }|j                  |       c}S c c}w rq   )r.  r   )arr
sorted_arrelements      rv   sorted_indicesz0Layout.is_stride_ordered.<locals>.sorted_indices  s*    J=@A'J$$W-AAAs   +r  Tr  F)r   r   r   r   r`   r   r   r   r   rr   r   
_shape_envr  )	rU  r   r   r  non_1_indicesr   r{  stride_orderedexprs	            rv   rc  zLayout.is_stride_ordered  s~   4;;3u:---
 $DII.
3ww))#):a? 
 
 +88Q$++a.88#01aq11	B
 u% E
*s5z" 	1A'-ayN58$	1 s5zA~& 	A!!$~a!e'<<DdD)ww))77"1%q1u(==d 8  	 ;
 91s   :E=E!E&c                    dgt        t        t        dt        | j                        dz
                    z   }t        |      g|z   }| j                  |      S Nr   r5   )r   rt  r   r   r   rc  r  s     rv   is_channels_last_stride_orderedz&Layout.is_channels_last_stride_ordered  sN    d8E!S-=-A$BCDDUu$%%e,,rx   c                   t        |      }t        |       dk(  r| S t        j                  st        j                  ||       r| S t        j                         }t        |d      r|j                  j                  dd      r| S t        d t        j                  | |      D              s| S t        |       }t        |      }t!        t        |             D cg c]  }d }}d||d   <   d}	t#        |dd d      D ]I  \  }
}||
dz
     }||   ||   z  }|t        j$                  kD  r||z  dk7  rt'        ||      |z  }d	}	|||<   K |	s| S t(        xj*                  dz  c_        |S c c}w )
z
        The padding does not change stride order but makes sure all strides larger
        than the threshold are multiple of align.
        r   r<  dislike_paddingFc              3  \   K   | ]$  }t        |t        t        j                  f       & y wrq   )rr   rs   r   r   r;  s     rv   r  z&Layout._pad_strides.<locals>.<genexpr>  s(      
 q3./
   *,r5   N)r9  T)r\  r   r6   pad_channels_lastr  rr  r`   get_current_noder=  r<  getr  rd  chainr   r   r   r   padding_stride_thresholdrL   r   num_comprehensive_padding)
in_stridesr   r   aligncurrent_fx_noder]  r   r   new_stridespaddedrankr   prev_idxr   s                 rv   _pad_strideszLayout._pad_strides  s    $E*z?a''F,N,N*-
 ,,.?F+0D0D0H0Hu1
   
__Z6
 
 '
3,\:
"'J"89Qq99 &'JqM"":ab>; 	&ID#!$(+H *T(^;F777FUNa<O /%7%K	&  ))Q.)- :s   	E&c                    t        | t              sJ | j                  J | j                  | j                  | j                  | j
                        | _        y rq   )rr   r   r   r  r   r   r[  s    rv   rV  zLayout.pad_strides  sD    $///{{&&&''TYY

Krx   c                F    t         j                  xr t        | t              S rq   )r6   comprehensive_paddingrr   r   r[  s    rv   rU  zLayout.should_pad_strides  s    ++P
40PPrx   c                    t        | t              r| S | j                         r| j                          t        | j                  | j
                  | j                  | j                  | j                        S rq   )	rr   r,  rU  rV  r   r   r   r   r-  r[  s    rv   as_fixedzLayout.as_fixed  sY    dK(K""$KKJJIIKKKK
 	
rx   c                    t         j                  sJ dt        |       j                   d       | j	                         j                         S )Nzconvert z to FixedLayout first)r   r  r   r   r  r  r[  s    rv   r  zLayout.make_indexer)  sG    ,, 	
tDz**++@A	
, }}++--rx   c                   | j                   |j                   k(  xrj | j                  |j                  k(  xrO | j                  |j                  k(  xr4 | j                  |j                  k(  xr | j                  |j                  k(  S rq   r   r   r   r   r-  )rU  others     rv   __eq__zLayout.__eq__/  so    KK5<<' ,

ekk),		UZZ', u||+, u||+	
rx   c                X    t        | j                  | j                  | j                        S rq   )r   r   r   r-  r[  s    rv   r`  zLayout.storage_size8  s    .tyy$++t{{SSrx   )r   r  r   r  r   r  r   zOptional[list[Expr]]r-  r   r   r   r  r  )r   torch.Tensorr  )r  r  r0  r  r   r   r  r   z
sympy.Expr)r   r   r   r   r  ro  r  r   rm  rW  r/  rr  ru  rc  r  r  rV  rU  r  r  r  r`  r   rx   rv   r  r  g  s     (,qz## # 	#
 %# # 
#$	
 HG !,>	 !F- 8 8tL
Q
.
Trx   r  c                      e Zd ZdZddZy)r,  z A Tensor layout we cannot changec                      fd}|S )z1A closure containing math to read a given elementc                   t        |       t        j                        k(  sJ t        |       t        j                        k(  sJ j                  }t	        | j                  j                        D ]  \  }}}|dk7  s|||z  z   } |S r~  )r   r   r   r-  r   )r   r  r   r   szrU  s        rv   r  z)FixedLayout.make_indexer.<locals>.indexerB  s    u:T[[!1111u:TYY///[[F#&udkk499#E 3VR7#cFl2F3 Mrx   r   rU  r  s   ` rv   r  zFixedLayout.make_indexer?  s    	 rx   Nr  )r   r   r   r  r  r   rx   rv   r,  r,  <  s
    *rx   r,  c                       e Zd ZdZdZed        Zed        Zed        Zed        Z	ed        Z
ddZdd	Zd
 Zd Zdd fdZ xZS )r   z(A Tensor layout we are allowed to changeFc                    t        |       dk(  rg S t        j                  j                  g}t	        | dd        D ]  }|j                  ||d   z          t        t	        |            S )Nr   r5   r  )r   r   r  r  rt  r/  r   )sizesreversed_stridesr   s      rv   r   z!FlexibleLayout.contiguous_stridesT  sh    u:?I!GGKK=U12Y' 	AD##D+;B+?$?@	AH-.//rx   c                    t        t        t        |                   t        |      k(  s	J | |f       t        j                  j
                  }dgt        |      z  }|D ]  }|||<   || |   z  } |S )z
        Create a stride based on the order the dimensions should be filled in.

        In this format, channels last would be:
            [1, 3, 2, 0]
        N)r0   r   r   r   r  r  )r  r   next_strider0  r   s        rv   fill_orderedzFlexibleLayout.fill_ordered]  sx     %E
+,
50AAQE5>QAggkk&3u:% 	1A$GAJ%a0K	1 rx   c                    t        t        t        |                   t        |      k(  sJ t        |      }t        j                  | |      S )z
        Create a stride based on the sorted order of a permuted range.

        In this format, channels last would be:
            [3, 0, 2, 1]
        )r0   r   r   r   r   r  )r  r   r   s      rv   r~  zFlexibleLayout.stride_orderedn  sB     %E
+,
50AAAA,U3
**5*==rx   c                >   |t         j                  k(  rt        j                  | t              S |t         j
                  k(  rt        j                  | t              S |t         j                  k(  rt        j                  |       S t        j                  d|       t        )aq  
        Create a stride based on a memory format.

        Memory format is translasted into a stride order,
        so channels_last is the same as:
            FlexibleLayout.stride_ordered(sizes, [3, 0, 2, 1])

        This interface does not support memory_format `torch.preserve_format`
        which should be used to deduce a format from another source
        z>stride_ordered_for_memory_format, unsuppored memory_format: %s)r   channels_lastr   r~  NHWC_STRIDE_ORDERchannels_last_3dNHWDC_STRIDE_ORDERcontiguous_formatr   rE  rF  r  )r  memory_formats     rv    stride_ordered_for_memory_formatz/FlexibleLayout.stride_ordered_for_memory_formatz  s     E///!008IJJe444!008JKKe555!44U;;IIP &%rx   c                (   t        |       t        |      k(  sJ |D cg c]+  }t        j                  j                  j	                  |      - }}t        t        t        |            |j                        }t        j                  | |      S c c}w )z
        Create a stride that has the same stride order as given stride

        For example, if given stride is [1000, 1, 100, 10],
        the fill order should be [1, 3, 2, 0]
        r'  )
r   r`   r   r   r  r.  r   __getitem__r   r  )r  r   ru   r   s       rv   same_orderedzFlexibleLayout.same_ordered  sv     5zS[(((BHIQ!''""55a8IIE#f+.F4F4FG
**5*== Js   0Bc                   | j                  | j                  |      }| j                         r)|r'| j                  || j                  | j                        }t        | j                  | j                  | j                  || j                        S rq   )r~  r   rU  r  r   r,  r   r-  )rU  r   r  r3  s       rv   as_stride_orderzFlexibleLayout.as_stride_order  sn    ((E:
""$**:tyy$**MJKKJJIIKK
 	
rx   c                    |}| j                         r)|r'| j                  || j                  | j                        }t	        | j
                  | j                  | j                  || j                        S rq   )rU  r  r   r   r,  r   r-  )rU  r  r  r3  s       rv   as_exact_strideszFlexibleLayout.as_exact_strides  s]    "
""$**:tyy$**MJKKJJIIKK
 	
rx   c                   | j                  | j                  |      }| j                         r'| j                  || j                  | j                        }t        | j                  | j                  | j                  || j                        S rq   )r  r   rU  r  r   r,  r   r-  )rU  r   r3  s      rv   as_fill_orderzFlexibleLayout.as_fill_order  sl    &&tyy%8
""$**:tyy$**MJKKJJIIKK
 	
rx   c                   | j                  | j                  |      }| j                         r'| j                  || j                  | j                        }t        | j                  | j                  | j                  || j                        S rq   )r  r   rU  r  r   r,  r   r-  )rU  r   r3  s      rv   as_same_orderzFlexibleLayout.as_same_order  sl    &&tyy&9
""$**:tyy$**MJKKJJIIKK
 	
rx   c                    |rt         j                  ||      }nt         j                  |      }t        |   ||||       y rq   )r   r  r   ri  r  )rU  r   r   r   r]  r0  rk  s         rv   r  zFlexibleLayout.__init__  s;    $11$EG$77=Gg6rx   r  rq   r  )r   r   r   r  r  r/  r   r  r~  r  r  r  r  r  r  r  r  r  s   @rv   r   r   N  s    2N 0 0    	> 	> & &0 
> 
>





7 7rx   r   c                  2     e Zd ZdZd fdZddZd Z xZS )NonOwningLayoutz,Is a view into the storage of another tensorc                    |j                         }t        | 	  |j                  |j                  |j
                  |j                         || _        y rq   )r   ri  r  r   r   r   r   view)rU  r  r*  rk  s      rv   r  zNonOwningLayout.__init__  sA    "MMLLKKMM		
 	rx   c                >    | j                         j                         S rq   )r  r  r[  s    rv   r  zNonOwningLayout.make_indexer  s    }}++--rx   c                    | j                   j                         j                  }|dk(  ryddlm} t
        j                  j                  j                  ||      S )Nr   Tr5   )	ALIGNMENT)	r  r   r-  utilsr  r`   r   r   statically_known_multiple_of)rU  r-  r  s      rv   maybe_guard_alignedz#NonOwningLayout.maybe_guard_aligned  sD    %%'..Q;$ww<<VYOOrx   )r  zUnion[BaseView, TensorBox]r   r   r  )r   r   r   r  r  r  r  r  r  s   @rv   r  r    s    6.Prx   r  c                      e Zd ZdZy)CommBufferTypesymm_memN)r   r   r   SYMM_MEMr   rx   rv   r  r    s    Hrx   r  c                  F     e Zd ZU dZded<   ded<   	 	 	 	 	 	 d fdZ xZS )CommBufferLayoutax  
    A layout that signifies the buffer is a comm buffer.
    In terms of striding, the layout is identical to `FixedLayout`.

    Buffers with this layout do not participate in in-place reuse - it can be
    neither the source nor the target for in-place reuse.

    For detailed motivation and usage of this layout, see
    NOTE [lowering-time collective optimization].
    r  comm_buffer_typer   
group_namec                   t        |t              st        d| d      |j                         }t        |   |j                  |j                  |j                  |j                  |j                         || _        || _        y )NzJA `CommBufferLayout` can only be initialized with a `FlexibleLayout` (got z).r  )rr   r   r  r  ri  r  r   r   r   r   r-  r  r  )rU  r*  r  r  fixedrk  s        rv   r  zCommBufferLayout.__init__  s     &.1 ++1("6 
 !<<++<<<< 	 	
 !1$rx   )r*  r   r  r  r  r   )r   r   r   r  r   r  r  r  s   @rv   r  r    s;    	 %$O%% )% 	% %rx   r  c                      e Zd ZU ded<    ej
                  d       Zded<    ej
                  d       Zded<   dd	Zd
 Z	ddZ
y)
NoneLayoutr  r   c                     dgS r  r   r   rx   rv   r&  zNoneLayout.<lambda>)  s     rx   default_factoryr  r   c                     dgS r  r   r   rx   rv   r&  zNoneLayout.<lambda>*  s    1# rx   r   c                     yr  r   r[  s    rv   r`  zNoneLayout.storage_size,  r  rx   c                    | S rq   r   r[  s    rv   r  zNoneLayout.as_fixed/      rx   c                    | j                   S rq   rq  r[  s    rv   r   zNoneLayout.get_device2  rr  rx   Nr&  r  )r   r   r   r   r-  r.  r   r   r`  r  r   r   rx   rv   r  r    sG     #"'k''DD)D)))+FFIFrx   r  c                       e Zd Zd
 fdZedd       Zej                  dd       ZddZddZd Z	e
dd       Zd Zdd	Z xZS )MutationLayoutSHOULDREMOVEc                   t         |   |j                         |j                         |j	                         d        || _        | j                         j                         }t        j                  j                  |       y rq   )ri  r  r  r   r   r  
get_bufferr  r`   r   mark_buffer_mutated)rU  r  r   rk  s      rv   r  z#MutationLayoutSHOULDREMOVE.__init__7  se    &&(OO		
  ))+	##D)rx   c                6    | j                         j                  S rq   )real_layoutr   r[  s    rv   r   z!MutationLayoutSHOULDREMOVE.strideB  s    !(((rx   c                     y rq   r   )rU  r   s     rv   r   z!MutationLayoutSHOULDREMOVE.strideF  s    rx   c                >    | j                         j                         S rq   )r  r`  r[  s    rv   r`  z'MutationLayoutSHOULDREMOVE.storage_sizeJ  s    !..00rx   c                d    fd | j                         }t        |t              sJ d       |S )Nc                    t        | t              r | j                        S t        | t              r | j	                               S t        | t
              r | j                        S | S rq   )rr   r  r  rj  r  
MutableBoxr)  )r  unwrap_viewss    rv   r  z;MutationLayoutSHOULDREMOVE.get_buffer.<locals>.unwrap_viewsN  sY    &"<=#FMM22&(+#F$6$6$899&*-#FKK00Mrx   z1MutationLayoutSHOULDREMOVE must refer to a buffer)r  rr   r`  )rU  r  r  s     @rv   r  z%MutationLayoutSHOULDREMOVE.get_bufferM  s9    	 dkk*&&) 	
?	
) rx   c                6    | j                         j                  S rq   )r  r*  r[  s    rv   r  z&MutationLayoutSHOULDREMOVE.real_layout]       '''rx   c                   |j                          t        j                  j                  |j	                                t        |t              r|j                  }|j                          |st        j                  |j                         |j                         |j                         t        |j                         |j                               D cg c]/  \  }}t        j                  j                   j#                  ||      1 c}}      j                  }|j                          t        |j                  j$                  t&              sJ t)        |      |j                  _        |j                  S c c}}w )Nr  )r  r`   r   r  r  rr   rm   r)  r  r  ry  r   r   r  r   r   r   r  r*  r   r  )rz  srcdstunsafe_aliasr  r  s         rv   realize_intoz'MutationLayoutSHOULDREMOVE.realize_into`  s    	
##CLLN3c9%((C 	""~~'mmo* !$CLLNCLLN C1 GG$$11!Q7	 #  d  	#((//>:::4S9xxs   4E6c                    | S rq   r   r[  s    rv   r  z#MutationLayoutSHOULDREMOVE.as_fixed  r  rx   c                6    | j                   j                         S rq   )r  r  r[  s    rv   r  z'MutationLayoutSHOULDREMOVE.make_indexer  r  rx   )r  rn   r   r   r   r  )r   r   r   r   r  )r   r`  r  r  )r   r   r   r  r2  r   setterr`  r  r  r  r  r  r  r  r  s   @rv   r  r  6  sb    	* ) ) ]] 1 (    D*rx   r  c                  ,    e Zd ZU ded<   ded<   d" fdZd#dZd$dZd%dZd&d	Zd'd
Z	e
d(d       Zd)dZd*dZd+dZd,dZd-dZd Zd Zd.d"dZd"dZd"dZ	 d.	 d"dZd Zd/dZd0d1dZd Zd2dZd2dZd3dZ	 d.	 	 	 d4dZd5dZd6d Z d7d!Z! xZ"S )8r`  r  r   r   r*  c                F    t         |           | j                  dd        y r<  )ri  r\  rW  rj  s    rv   r\  zBuffer.__post_init__  s    t4rx   c                >    | j                         j                         S rq   )r   r  r[  s    rv   r  zBuffer.make_indexer  s     --//rx   c                @    | j                   sJ |        | j                   S rq   r_  r[  s    rv   r  zBuffer.get_name  s    yy$yyyrx   c                    t        | j                  t              r| j                  j                         S t	        t        | j                        j                        rq   )rr   r*  r  rm  r  r   r   r[  s    rv   rm  zBuffer.get_example  s=    dkk6*;;**,,!$t{{"3"<"<==rx   c                >    | j                         j                         S rq   )r  r   r[  s    rv   r   zBuffer.get_device  s    ##%0022rx   c                     y rq   r   r[  s    rv   ro  zBuffer.get_defining_op  rp  rx   c                6    | j                         j                  S rq   )r   r   r[  s    rv   r   zBuffer.dtype  s     &&&rx   c                :    g | j                         j                  S rq   )r   r   r[  s    rv   r   zBuffer.get_size  s    ("''((rx   c                :    g | j                         j                  S rq   )r   r   r[  s    rv   r  zBuffer.get_stride  s    *"))**rx   c                6    | j                         j                  S rq   )r   r-  r[  s    rv   
get_offsetzBuffer.get_offset  r  rx   c                    t        | j                  t              r| j                  S t        t	        | j                        j
                        rq   )rr   r*  r  r  r   r   r[  s    rv   r   zBuffer.get_layout  s4    dkk6*;;!$t{{"3"<"<==rx   c                    | j                   S rq   r  r[  s    rv   r  zBuffer.get_output_spec  rr  rx   c                "    | j                         S rq   )r  r[  s    rv   r  zBuffer.get_storage_numel  s    ~~rx   c                    t        | j                  t              r;t        | j                  t              s | j                  j	                         | _        y y y rq   )rr   r*  r  r  r  r[  s    rv   r  zBuffer.freeze_layout  s>    dkk6*:KK4
 ++..0DK4
*rx   c                    t        | j                  t              sJ | j                  j                  ||      | _        y Nr^  )rr   r*  r   r  r  s      rv   r  z&Buffer.freeze_layout_with_stride_order  s1    $++~666kk11%}1Urx   c                |    t        | j                  t              sJ | j                  j                  |      | _        y rq   )rr   r*  r   r  r  s     rv   r  z$Buffer.freeze_layout_with_fill_order  s,    $++~666kk//6rx   c                |    t        | j                  t              sJ | j                  j                  |      | _        y rq   )rr   r*  r   r  r  s     rv   r  z$Buffer.freeze_layout_with_same_order  s,    $++~666kk//7rx   c                    t        | j                  t              sJ | j                  j                  ||      | _        y r
  )rr   r*  r   r  r  s      rv   r  z'Buffer.freeze_layout_with_exact_strides  s8     $++~666kk22 3 
rx   c                    t         j                  j                  j                  t	        j
                  | j                         d            S r  r  r[  s    rv   r  zBuffer.is_zero_elements  r  rx   c                p      j                         rt        t         j                               S  fd}|S )Nr  c                x    j                         }t        j                  j                  xs d ||             S r  )r  r^   r  r   r   r  rU  s     rv   r  z"Buffer.make_loader.<locals>.loader  s/    '')G88DII2GENCCrx   )r  r
   r  r   r	  s   ` rv   r  zBuffer.make_loader  s0      "=0@AA	D rx   c                "    | j                         S rq   r  r  s     rv   r  zBuffer.codegen_reference  r  rx   c                     y rq   r   r[  s    rv   r1  zBuffer.decide_layout  r  rx   c                    t        | j                  t              r%| j                  j                  j	                         gS yrC  )rr   r*  r  r  r  r[  s    rv   r  z#Buffer.get_inputs_that_alias_output  s/    dkk?3KK$$--/00rx   c                    t        | j                  t              r%| j                  j                  j	                         gS yrC  )rr   r*  r  r  r  r[  s    rv   r  zBuffer.get_mutation_names  s0    dkk#=>KK&&//122rx   c                6    t        | j                         g      S rq   )r0   r  r[  s    rv   rf  zBuffer.get_read_names  s    4==?+,,rx   c                    t               S rq   r/   r  s     rv   r  zBuffer.get_free_symbol_uses       |rx   c                    t               S rq   r/   r[  s    rv   rL  zBuffer.get_unbacked_symbol_defs  rM  rx   c                     y rq   r   r[  s    rv   r  zBuffer.realize  r  rx   c                     yr  r   r[  s    rv   should_allocatezBuffer.should_allocate  s    rx   r  r  r  )r   z!Union[torch.Tensor, sympy.Symbol]r  r  r  r  r  r
  r  r  r  r  rq   r  r,  r  r(  rS  r  r  )#r   r   r   r   r\  r  r  rm  r   ro  r2  r   r   r  r  r   r  r  r  r  r  r  r  r  r  r  r1  r  r  rf  r  rL  r  r  r  r  s   @rv   r`  r`    s     
50>
3 ' ')+(>
 1V78
 ,1
	
U	

- %*!	!
rx   r`  c                  <    e Zd ZddZddZej                  ZddZy)OperationBufferc                    | gS rq   r   r[  s    rv   rJ  zOperationBuffer.get_outputs  s	    vrx   c                    | S rq   r   r[  s    rv   ro  zOperationBuffer.get_defining_op  r  rx   c                X    t         j                  |        t        j                  |        y rq   )r`  r\  r4  r[  s    rv   r\  zOperationBuffer.__post_init__  s    T"%rx   NrQ  r   r4  r  )r   r   r   rJ  ro  r4  r  r\  r   rx   rv   r  r    s     #55&rx   r  c                      e Zd ZddZy)InputBufferc                     yr~  r   r[  s    rv   r  zInputBuffer.num_reads  r  rx   Nr&  )r   r   r   r  r   rx   rv   r%  r%    s    rx   r%  c                      e Zd ZdZy)DonatedBufferaY  
    Represents a donated buffer which is a saved tensor that is not alias to any
    fwd inputs, fwd user outputs, and bwd outputs. We generally cannot inplace
    reuse the input tensor memory during backward since it might be used in another
    function. However, donated buffer can be inplace reused during backward
    to save memory.
    N)r   r   r   r  r   rx   rv   r(  r(  #  s    rx   r(  c                  ,    e Zd ZU dZded<   ddZddZy)r  Nr  r  c                     d fd}|S )Nc                    j                         j                         }t        j                  t        j
                  j                  j                         j                         ||             S rq   )	r   r  r^   r  r`   r   constant_namer  r  r  s     rv   r  z*ConstantBuffer.make_loader.<locals>.loader1  sP    oo'446G88%%dmmot7K7KL rx   r  r   r	  s   ` rv   r  zConstantBuffer.make_loader0  s    	 rx   c                    t        t        j                  j                  | j	                         |      | j
                        S N)r   r*  )r  r`   r   r,  r  r*  r  s     rv   r  z!ConstantBuffer.constant_to_device:  s/    &&t}}?
 	
rx   r  r+  )r   r   r   r  r   r  r  r   rx   rv   r  r  -  s    .2O+2
rx   r  c                  @    e Zd ZddZ	 d	 	 	 d	dZd
ddZddZddZy)NoneAsConstantBufferc                    t               S rq   r/   r[  s    rv   re  zNoneAsConstantBuffer.get_readsB  rM  rx   c                    t               S rq   r/   r  s     rv   r  z)NoneAsConstantBuffer.get_free_symbol_usesE  r  rx   Nc                J    t         j                  j                  j                  S rq   )r`   r   r  none_strr  s     rv   r  z&NoneAsConstantBuffer.codegen_referenceJ  s    ww##,,,rx   c                    t        d       S Nrq  )r  r[  s    rv   r  z$NoneAsConstantBuffer.get_output_specM  s    &&rx   c                     yr  r   r[  s    rv   r  z&NoneAsConstantBuffer.has_tensor_outputP  r  rx   r%  r  r(  rq   r  r  r  )r   r   r   re  r  r  r  r  r   rx   rv   r0  r0  @  s0     %*!	!
-'rx   r0  c                  <    e Zd ZU ded<   	 d	 	 	 ddZd	d
dZddZy)r   r   r  c                .    t        | j                  |      S rq   )r   r  r  s     rv   r  z*ShapeAsConstantBuffer.get_free_symbol_usesX  s      		=99rx   Nc                h    t         j                  j                  j                  | j                        S rq   )r`   r   r  codegen_sizevarr  r  s     rv   r  z'ShapeAsConstantBuffer.codegen_reference]  s!    ww##33DII>>rx   c                     yr  r   r[  s    rv   r  z'ShapeAsConstantBuffer.has_tensor_output`  r  rx   r  r(  rq   r  r  )r   r   r   r   r  r  r  r   rx   rv   r   r   T  s+    
J %*:!:	!:
?rx   r   c                       e Zd ZU ded<   ddZddZddZddZddZ	 d	 	 	 ddZ	d fd	Z
dd
ZddZddZe	 	 d d       Z	 	 d!	 	 	 	 	 d"dZe	 d#d       Zd$dZddZd%dZd%dZd&dZ xZS )'r+  rU  r)  c                    | j                   | j                   S t        | j                  d      r| j                  j                   S y)z
        Returns self.name if it exists, otherwise returns the name of the data node if that exists.
        If neither exist, returns None.
        Nr   )r   r=  r)  r[  s    rv   get_computed_buffer_namez'ComputedBuffer.get_computed_buffer_nameh  s7    
 99 99499f%99>>!rx   c                6    | j                   j                         S rq   r)  r  r[  s    rv   r  zComputedBuffer.num_readss  r  rx   c                6    | j                   j                         S rq   r)  re  r[  s    rv   re  zComputedBuffer.get_readsv  r  rx   c                6    | j                   j                         S rq   r  r[  s    rv   rf  zComputedBuffer.get_read_namesy  r  rx   c                   t        j                  t        dd      5  | j                  j	                         rTt        | j                         | j                  j                         | j                  j                               cd d d        S t        | j                         | j                  j                               cd d d        S # 1 sw Y   y xY wr  )
r   r   r   r)  r  r?   get_store_functionrv  r  r   r[  s    rv   r  zComputedBuffer.get_read_writes|  s    \\.*:DA 	yy++-*++-II002II002	 	 +++-II&&(	 	 	s   A%C1CCc                    t        | j                         |      t        | j                         |      z  t        | j                         |      z  | j                  j                  |      z  S rq   )r   r   r  r  r)  r  r  s     rv   r  z#ComputedBuffer.get_free_symbol_uses  s`    * T]]_m<t0-@At0-@A ii,,];<	
rx   c                    | j                         sS| j                  t        j                  j                  vr-| j                         dk(  r| j                  j                         S t        |          S r  )	r  r   r`   r   mutated_buffersr  r)  r  ri  rj  s    rv   r  zComputedBuffer.make_loader  sW    '')		!8!88 A% 99((**w"$$rx   c                   | j                         j                         j                         }t        | j                  t
        t        t        f      r+t        | j                  j                  | j                  |      S t        | j                  t              sJ t        | j                  j                  | j                  |      S rq   )r   r  r  rr   r)  r  r  r6  r
   r  r   r  r  r  s     rv   rF  z!ComputedBuffer.get_store_function  s    //#,,.;;=dii)T4!8949944diiIIdii33349911499gFFrx   c                P   t        | j                  t              r{t        j                  | j
                  j                         | j
                  j                               \  \  }}}| j                         j                  }t        d |D              sJ |D cg c]_  }t        |t        j                        rCt        |j                  |D ci c]#  }|dk7  s	|t        j                  j                   % c}      a }}}|rt        | j
                  t"        t$        f      r| j
                  j'                  ||      }n|}|D cg c],  }t(        j*                  j,                  j/                  ||      . }	}ddlm}
  |
|	| j5                               S yc c}w c c}}w c c}w )al  
        If our layout is still flexible, try to determine the stride order based on stride orders of reads.

        TODO(jansel): A better algorithm here would look at downstream consumers of this
                      value and try to do global graph-level layout optimization.
                      This is also something just begging to be autotuned.
        c              3  p   K   | ].  }t        |t        j                  t        j                  f       0 y wrq   )rr   r7   StarDep	MemoryDepr  s     rv   r  z0ComputedBuffer.get_fill_order.<locals>.<genexpr>  s0       1|33\5K5KLMs   46r   r5   pick_loop_orderN)rr   r*  r   r7   rG  r)  rv  r  r  r  r  rN  r\   r   r   r  r  r  r6  r   r`   r   r   rI  	schedulerrP  r   )rU  
index_varsr  r   r  r|  vr4  r  stride_lengthsrP  s              rv   r   zComputedBuffer.get_fill_order  sj    dkk>2.:.M.M		,,.		0L0L0N/+(Z! ((*00E      a!7!78 177n$WPQUVPVQ_$WXE  dii$6"ii//
NKG(GMR"EIAGG$$11$@" " 7&~t}}GG# %X"s$   3F
FF6	F1F#Fc                    t        | j                  t              r5| j                         }|r| j	                  |       y | j                          y y rq   )rr   r*  r   r   r  r  r  s     rv   r1  zComputedBuffer.decide_layout  s@    dkk>2'')E2259""$ 3rx   c                z   t        j                  | j                  j                         | j                  j	                         d      \  }}t        j                  t        d| j                               5  t        | j                         | j                         r|n|d d |g| }d d d        g }g }g }g }|j                         D ]^  \  }}	||d   v r'|rJ |j                  |       |j                  |	       4||d   v sJ |j                  |       |j                  |	       ` ||f||ffS # 1 sw Y   xY w)Nqrk   r  r5   r   )r7   rG  r)  rv  r  r   r   r  r   rA   rF  r  itemsr/  )
rU  r   
var_rangesr  rR  reduce_vars
index_sizereduce_sizerS  r   s
             rv   get_default_sizes_bodyz%ComputedBuffer.get_default_sizes_body  sI    (::II((*DII,H,H,JSV
j \\.*;T__=NO 	'')002Ra 	D	 
!#
$$& 	&DAqDG|&&!!!$!!!$DG|#|""1%""1%	& K($[0III)	 	s   52D11D:c                     j                         \  \  }}}\  }}|r |||f|||f      \  \  }}}\  }}g |j                  j                         |t        |t              rt        |      dk(  sJ |\  }}	t        |t              sJ t        |	t              sJ t        d |	D              sJ |j                  }
|
|k(  s	J |
|f       |	D cg c]	  }|vs| }	}|	z  g |j                         t        j                  j                   t        j                        sj!                  |j#                                 fd}||z   }t%        t'                      xs t(        j*                   } |||||      \  }}} |||||      \  }}}t-        j.                  ||d      \  \  }}}t1        | ||       ||      g|||      }||f|fS c c}w )an  
        This is a main place where we do loop transformations in a
        backend-agnostic way.

        Here we:
            1) Remove any 1 dimensions
            2) Fuse contiguous dimensions together
            3) Reorder dimensions based on stride orders

        Optional argument extra_indexing_constraints can be used to append additional
        indexing expressions to existing ones derived from buffer's body. This can be useful
        to fuse scheduler nodes with compatible ranges, e.g. (s0*s1*...,) and (s0, s1, s2, ...)
        on CPU by preventing indexing simplifications and obtaining index/reduce ranges for
        the scheduler node compatible with other nodes.
        Optional argument recompute_sizes_body_func can be used to recompute sizes and body
        on the default body. This can be useful to append additional loop transformations.
        r   c              3  <   K   | ]  }t        |t                y wrq   )rr   r   )r  fs     rv   r  z6ComputedBuffer.simplify_and_reorder.<locals>.<genexpr>5  s     Hqz!T*HrW  c           	         j                  | ||
      \  }}} ||       } |rGt        j                  j                  j	                  | |t        	| |            \  }}}t        ||      }n|}|||fS rq   )_apply_loop_reorderingr`   r   r   _simplify_loopsr;   r   )x_varssupport_varsr  simplify_loopsreindex0r   r   _pruner   index_formulasmemory_addrsrU  s            rv   simplify_and_reorderzAComputedBuffer.simplify_and_reorder.<locals>.simplify_and_reorderF  s    (,(C(Ce\)%E8X f%F*+''*:*:*J*J,^VUK+'x
 *(H="'8++rx   prk   )r]  indexing_exprsr   rr   r   r   r   r   r  rY  get_write_exprsr`   r   r=  r8   PREFER_STORE_LOOP_ORDERextendget_read_exprsrW   r   r6   loop_ordering_after_fusionr7   index_vars_no_squeezerA   )rU  extra_indexing_constraintsrecompute_sizes_body_funcr[  r\  r  rR  rZ  extra_indexing_rangesextra_indexing_exprexpected_var_rangesr]  rk  re  should_merge_loopsiter_rangesiter_reindexr   reduce_rangesreduce_reindex	iter_varsrY  ri  rj  s   `                     @@rv   rk  z#ComputedBuffer.simplify_and_reorder  s`   4 '')		
%Z%Z %
 *[)4*k1J	)[)[
 94..5578%15u=23q89 :T6!#63T:::14888H4GHHHH"&//&*?? #%B ? /#!>2I# # 11N0--/0ww""4)O)OP 3 3 56	,$ "K/t,--VV5V5V1V 	 (<	(
$\1 ,@{4F,
(~q
 0</Q/Q0
, K*
 )$n[&AB
 ]+T11w#s   	GGc           
     X   ddl m} |g }	 |D cg c]-  }t        j                  j                  j                  || |      / }}t        |      t        |      k(  rt        |d         t        |       k(  sJ t        t         ||||                  }|D 	cg c]  }	||	   	 }}	|t#        |      t%        |      fS c c}w # t        $ rZ t        j                  r*t        j                  dt        t        | |            |       t        t!        t        |                  }Y w xY wc c}	w )zU
        Shuffle the order of loops around to hopefully improve performance.
        r5   rO  r   z%Did not simplify complex index:
%s
%s)rQ  rP  r`   r   r   rI  r   r   rt  	Exceptionr6   rF  rE  warningr   r   r   r   r   )
rR  re  r  rj  priority_idxrP  r  r0  r   r   s
             rv   rb  z%ComputedBuffer._apply_loop_reorderingz  s'    	/L	, )   --dJMG  w<3|#44WQZCM :   /'5,"OPQE $))aq))l5)?5+AAA#  	,||=Z/0 
 s5z*+E	, *s*   C 2B<AC D'<C A D$#D$c                6    | j                   j                         S rq   r)  r  r[  s    rv   r  z!ComputedBuffer.get_reduction_size      yy++--rx   c                6    | j                   j                         S rq   r)  r  r[  s    rv   r  z!ComputedBuffer.get_reduction_type  r  rx   c                6    | j                   j                         S rq   )r)  r  r[  s    rv   r  zComputedBuffer.is_no_op  r  rx   c                     yNTr   r[  s    rv   r  zComputedBuffer.should_allocate  rp  rx   c                8    | j                   j                  |      S )r  r)  r  r  s     rv   r  z!ComputedBuffer.constant_to_device  s    yy++F33rx   r  r&  r%  r  r$  r  r(  r  )r   zCallable[..., None])r   zOptional[list[int]]r  )r   zetuple[tuple[list[sympy.Expr], list[sympy.Expr]], LoopBody, tuple[list[sympy.Expr], list[sympy.Expr]]]NN)rt  *Optional[tuple[dict[Any, Any], list[Any]]]ru  Optional[Callable[..., Any]]r   z:tuple[tuple[list[sympy.Expr], list[sympy.Expr]], LoopBody]rq   r)  r  r+  )r   r   r   r   r?  r  re  rf  r  r  r  rF  r   r1  rK   r]  rk  r/  rb  r  r  r  r  r  r  r  s   @rv   r+  r+  d  s    
K	%%* %*
!
	!
6%G%N% J
J JD RVBFq2$Nq2 $@q2 
D	q2f  !B !BF..,4rx   r+  c                  n     e Zd ZdZ	 	 	 	 	 	 	 	 d	 fdZd
dZd ZddZddZddZ		 	 d	 	 	 ddZ
 xZS )TemplateBufferzt
    Represents a Triton (in the future other type) of template operator
    that we can fuse an epilogue onto.
    c                    t         |   d |       t        j                  |      | _        || _        t        j                  j                  |       | _	        t        j                  j                  |        y r.  )ri  r  InputsKernelunwrap_storagerC  make_kernel_renderr`   r   register_bufferr   register_operation)rU  r*  rC  r  rk  s       rv   r  zTemplateBuffer.__init__  sY     	d62"11&9"4GG++D1		""4(rx   c                &    | j                  d      S )NT	normalize)r?   r[  s    rv   r  zTemplateBuffer.get_read_writes  s    ''$'77rx   c           	        | j                         | j                         j                         fd}t        j                  || j                         d|      }| j                  D ]f  j                  j                         fd}|xj                  t        j                  |j                         dd      j                  z  c_        h |S )Nc                ^    t        |      dk(  sJ t        j                   |       d      S )Nr   fake)r   r^   r  )r   r  r  r   s     rv   dummyz1TemplateBuffer.extract_read_writes.<locals>.dummy  s,    v;!###99T75>6::rx   r   r  c                z    t        |      dk(  sJ t        j                  j                          |              y r  )r   r^   r  r  )r   r  r  rE  s     rv   r  z1TemplateBuffer.extract_read_writes.<locals>.dummy  s-    6{a'''8rx   T)	r  r   r  r7   r?   r   rC  r*  r  )rU  r  r  depsr  rE  r   s       @@@rv   r?   z"TemplateBuffer.extract_read_writes  s    }}//#002	; //4==?B)
 ;; 		Cjj--/G9 JJ,::s||~rTeJ		 rx   c                6    t         j                  j                  S rq   )r   r  r  r[  s    rv   r  z!TemplateBuffer.get_reduction_size  s    ww{{rx   c                     y rq   r   r[  s    rv   r  z!TemplateBuffer.get_reduction_type  rp  rx   c                     yr  r   r[  s    rv   r  zTemplateBuffer.should_allocate  rp  rx   c                *    | j                         dfd fS rC  r  )rU  rt  ru  s      rv   rk  z#TemplateBuffer.simplify_and_reorder  s$      
 	
rx   )r*  r  rC  Sequence[IRNode]r  rV  r   r   r$  r)  r  r  r  )rt  r  ru  r  )r   r   r   r  r  r  r?   r  r  r  rk  r  r  s   @rv   r  r    sn    

)
) !
) /	
)
 

)82
 RVBF
$N
 $@
rx   r  c                  ^     e Zd Z	 	 d	 	 	 	 	 d fdZ	 d	 	 	 d	 fdZd
dZddZddZ xZS )TritonTemplateBufferc           
     :   t         
|   |||       || _        | g| _        |t        j
                  j                  j                  t        j
                  j                  j                  f}t        j                  j                  j                  }||v sJ d| d|        | j                  d   j                         }| xj                  |D 	cg c]  }	t        t!        |      |	|        c}	z  c_        |r|n	t#               | _        d| _        d| _        yc c}	w )a  
        NOTE:[TritonTemplates with multiple outputs]
        We want the ability for TritonTemplates to output multiple tensors. Triton
        kernels have no notion of outputs and this is done by creating tensors that
        are then mutated by the kernel. Currently our STORE_OUTPUT codegen doesn't
        support creating multinode outputs for triton templates.
        We work around this by creating an extra input buffer during the lowering
        and we mark them as mutated inputs.
        Nz$Mutated inputs are only allowed for z	 but got r   rq  )ri  r  mutated_inputsoutputsr   r^   higher_orderflex_attentionflex_attention_backwardr`   r   current_noder  rC  r   MutationOutputr  r0   allowed_prologue_inpssubgraph_inpssubgraph_outs)rU  r*  rC  r  r  r  allowed_setr  r   r6  rk  s             rv   r  zTritonTemplateBuffer.__init__  s   " 	);<,&*V% 		&&55		&&>>K 77//66L;. 6{m9\N[. [[^..0FLL) z8#tD L &;!
 	" SW?Cs   Dc                   t         |   |      }| j                  r| j                  ng }| j                  r| j                  ng }|D ]m  }t	        |t
        j                        r|j                  t        ||             9t	        |t              r!|j                  |j                  |             j|mJ  |D ]7  }t	        |t              r!|j                  |j                  |             4|7J  |S rq   )
ri  r  r  r  rr   r   r   updater   rn   )rU  r   resr  r  rE  r   rk  s          rv   r  z)TritonTemplateBuffer.get_free_symbol_uses   s     g*=9.2.@.@**b.2.@.@**b  	#C#uzz*

+C?@C(

333MBC{"{	# ! 	#C#v&

333MBC{"{		# 
rx   c                    | j                   S rq   )r  r[  s    rv   rJ  z TritonTemplateBuffer.get_outputs7  r  rx   c                    | j                   S rq   )r  r[  s    rv   get_allowed_prologue_inpsz.TritonTemplateBuffer.get_allowed_prologue_inps:  s    )))rx   c                &    d| j                    d}|S )NzTritonTemplateBuffer(layout=r   r  )rU  r   s     rv   ro  zTritonTemplateBuffer.__str__=  s    ,T[[M;
rx   r  )r  zOptional[Iterable[IRNode]]r  zOptional[OrderedSet[str]]r   r   r  r(  rQ  r  r  )	r   r   r   r  r  rJ  r  ro  r  r  s   @rv   r  r    s\     6:;?)D
 3)D  9)D 
)DX %*!	!.*rx   r  c                  v     e Zd ZdZ	 	 	 	 	 	 	 	 	 	 d fdZddZddZd ZddZddZ	ddZ
dd	Zdd
Z xZS )ChoiceCallera.  
    Represents a possible choice used in autotune_process.py.
    During autotuning, self.benchmark() is first called to get benchmark result,
    and if this choice is selected, self.output_node() is called to get the output_node.

    Children classes: TritonTemplateCaller, CUDATemplateCaller.
    c                Z    t         |           || _        || _        || _        || _        y rq   )ri  r  r   r*  r~   description)rU  r   r~   r*  r  rk  s        rv   r  zChoiceCaller.__init__N  s0     		& 'rx   c                   | j                         t        j                  rt        fd      S t	        j
                  d|i      S )Nc                        S rq   r   )algor   s   rv   r&  z(ChoiceCaller.benchmark.<locals>.<lambda>`  s    D$K rx   r   )to_callabler6   /profile_bandwidth_with_do_bench_using_profilingrP   rF   	benchmark)rU  r   r   r  s     `@rv   r  zChoiceCaller.benchmark]  s?    !AA+,?@@$$T4%>>rx   c                    t         rq   r9  r[  s    rv   	call_namezChoiceCaller.call_namec  r:  rx   c                    t         rq   r9  r[  s    rv   r  zChoiceCaller.to_callablef  r:  rx   c                "    | j                         S )z
        Hash key for the underlying kernel. By default, we assume there are no
        runtime params, so kernel hash key defaults to choice caller's hash key.
        )hash_keyr[  s    rv   kernel_hash_keyzChoiceCaller.kernel_hash_keyi  s    
 }}rx   c                    t         rq   r9  r[  s    rv   r  zChoiceCaller.hash_keyp  r:  rx   c                    t         rq   r9  r[  s    rv   r?  zChoiceCaller.output_nodes  r:  rx   c                    i S )zRInformation returned here is logged to the autotune log file when that is enabled.r   r[  s    rv   	info_dictzChoiceCaller.info_dictv  s    	rx   c                     y)Nunsupported_choicer   r[  s    rv   autoheuristic_idzChoiceCaller.autoheuristic_idz  s    #rx   )
r   r   r~   rR  r*  r  r  r   r   r   )r   r  r  )r   rm   )r   z<dict[str, Union[PrimitiveInfoType, list[PrimitiveInfoType]]])r   r   r   r  r  r  r  r  r  r  r?  r  r  r  r  s   @rv   r  r  E  se    '' "' 	'
 ' 
'?""""$rx   r  c                      e Zd ZddZy)TritonTemplateCallerBasec                    t         rq   r9  r[  s    rv   get_make_kernel_renderz/TritonTemplateCallerBase.get_make_kernel_render  r:  rx   N)r   r   )r   r   r   r  r   rx   rv   r  r  ~  s    "rx   r  c                       e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 d fdZed	d       Zed
d       Zej                  dd       Z
ddZddZ xZS )MultiTemplateBufferaG  
    Represents a Buffer with multiple backing implementation choices.

    Choices can be TritonTemplates or ExternKernels. During scheduling if there is a potential
    epilogue we will benchmark each of the choices with the epilogue to determine an implementation.
    Otherwise, the fastest base choice will be chosen.
    c                    t         |   ||d |       || _        d | _        || _        t        d |D              | _        y )N)r*  rC  r  r  c              3     K   | ]R  }t        |t              xs< t        |t        j                  j                  j
                        xr |j                   T y wrq   )rr   r  r   	_inductorselect_algorithmExternKernelCallerhas_out_variant)r  choices     rv   r  z/MultiTemplateBuffer.__init__.<locals>.<genexpr>  sT      %
  v78 65??#C#C#V#VW +**%
s   AA)ri  r  _choice_timings_fn_choice_timingsoriginal_inputsr  _output_plannable)rU  r*  rC  choice_timings_fnunfiltered_choicesr  rk  s         rv   r  zMultiTemplateBuffer.__init__  sY     	#"7	 	 	
 #4DH%!$ %
 -%
 "
rx   c                    | j                   S )z^
        Are all possible choices TritonTemplates or Extern Kernels with out variants
        )r  r[  s    rv   output_plannablez$MultiTemplateBuffer.output_plannable  s    
 %%%rx   c                \    | j                   | j                         | _         | j                   S rq   )r  r  r[  s    rv   choice_timingsz"MultiTemplateBuffer.choice_timings  s+    '#'#:#:#<D ###rx   c              #    K   t        |t        j                  j                  j                        sJ | j
                  |j
                  k(  sJ | j                  }|j                         | _        	 d  || _        y # || _        w xY wwrq   )rr   r   r  r  TritonTemplateCallerr*  r  r  )rU  callerrenders      rv   swap_as_triton_callerz)MultiTemplateBuffer.swap_as_triton_caller  sp     &%//"B"B"W"WXXX{{fmm+++(("("?"?"A	-&,D#fD#s   A-B0A< 4B<	BBc                2   t        |t        j                  j                  j                        sJ | j                         |j                  j                  k(  sJ | j                         |j                  j                  k(  sJ |j                         | _        y rq   )rr   r   r  r  r  r   r*  r   r  r   r  r  )rU  r  s     rv   finalize_as_triton_callerz-MultiTemplateBuffer.finalize_as_triton_caller  sp    &%//"B"B"W"WXXX}}&--"4"4444 FMM$8$8888"("?"?"Arx   c                z    t        | j                  | j                  j                        }|| j                  |   fS )Nr'  )r  r  r  )rU  
min_choices     rv   get_min_choicez"MultiTemplateBuffer.get_min_choice  s6    ,,$2E2E2I2IJ
D//
;<<rx   )r*  r  rC  r   r  z'Callable[[], dict[ChoiceCaller, float]]r  zlist[ChoiceCaller]r  r  r   r   r  )r   zdict[ChoiceCaller, float])r  r  )r  r  r   r   )r   ztuple[ChoiceCaller, float])r   r   r   r  r  r2  r  r  r0  r1  r  r  r  r  r  s   @rv   r  r    s    

 
 C	

 /
  /
 

4 & & $ $
 	- 	-B=rx   r  c                  >     e Zd Z	 	 	 	 	 	 	 	 d fdZd ZddZ xZS )CUDATemplateBufferc                R    t         |   |||       || _        || _        || _        y rq   )ri  r  workspace_sizetemplatesupports_epilogue_fusion)rU  r*  rC  r  r  r  r  rk  s          rv   r  zCUDATemplateBuffer.__init__  s.     	);<, (@%rx   c                6    | j                   | j                   S dS r  )r  r[  s    rv   rP  z%CUDATemplateBuffer.get_workspace_size  s    &*&9&9&Et""L1Lrx   c                x    | j                         D ]'  }t        j                  |j                         d d        ) y rq   )rJ  r^   r  r  )rU  r7  s     rv   emulate_store_fnz#CUDATemplateBuffer.emulate_store_fn  s1    &&( 	5FIIfoo't4	5rx   )r  rs   r  rc   r  r   r   r   r  )r   r   r   r  rP  r  r  r  s   @rv   r  r    s@    A
 A A #'A 
AM5rx   r  c                  ,     e Zd Zd fdZd fdZ xZS )CppTemplateBufferc                R    t         |   |||       || _        || _        d | _        y rq   )ri  r  r  r  r  )rU  r*  rC  r  r  r  rk  s         rv   r  zCppTemplateBuffer.__init__  s*    );< /3rx   c                   t        | j                  t              r]t        | j                  t              sJ | j                  d   }t        |t
              sJ |j                  }t        |t              sJ |S t        | !         S r  )	rr   r*  MultiOutputLayoutr  r   r`  r  ri  r   )rU  first_outputr*  rk  s      rv   r   zCppTemplateBuffer.get_layout  sq    dkk#45dllH555<<?LlF333!((Fff---M7%''rx   r  r  )r   r   r   r  r   r  r  s   @rv   r  r    s    4	( 	(rx   r  c                  Z    e Zd ZU ded<   d
dZddZedd       Zed        Z	ddZ
ddZy	)r  rR  rC  c                   t        t        j                            }t        j                  | j                  D ]c  }t        |t              r|j                  fd|D               .t        |t              r?|j                   |j                                      e t        t        j                     fd| j                         D              }t        j                  ||t                     S )Nc              3  J   K   | ]  } |j                                 y wrq   r  )r  ru   rM  s     rv   r  z/InputsKernel.get_read_writes.<locals>.<genexpr>  s     BqWQZZ\2B    #c              3  J   K   | ]  } |j                                 y wrq   r  )r  r6  rM  s     rv   r  z/InputsKernel.get_read_writes.<locals>.<genexpr>  s!      .
(+GCLLN#.
r  )r  writesindex_exprs)r0   r7   r<   rM  rC  rr   r   r  r   r  r  rJ  
ReadWrites)rU  r  inputr  rM  s       @rv   r  zInputsKernel.get_read_writes  s    <++,.&&[[ 	5E%&BEBBE#89		'%.."234	5 L,,- .
/3/?/?/A.
 
 &&"
 	
rx   c                6    | j                         j                  S rq   r  r[  s    rv   re  zInputsKernel.get_reads  r  rx   c                   t        |t              r|j                  }t        |t              r|j                  }t        |t              r%t        |t
              st        j                  |      }t        |t              r| j                  |      S t        |t              r|S t        |t        t
        f      sJ |       |S rq   )rr   rm   r)  r_  rj  r.  r  realize_inputunwrap_storage_for_inputTorchBindObjectr`  rz  ru   s     rv   r  z%InputsKernel.unwrap_storage_for_input  s    a#Aa$Aa":a+I**1-Aa#
 //22a)H!fo67::7rx   c                    g }| D ][  }t        |t              r#|D cg c]  }t        j                  |       }}nt        j                  |      }|j	                  |       ] |S c c}w rq   )rr   r   r  r  r/  )rC  
inputs_newru   r   s       rv   r  zInputsKernel.unwrap_storage%  sj    
 	!A!T"GHI!\::1=II 99!<a 	! 	 Js   A%c                     yr  r   r[  s    rv   r  zInputsKernel.is_extern0  rp  rx   c                     yr~  r   r[  s    rv   r  zInputsKernel.num_reads3  r  rx   Nr$  r%  )ru   rn   r   rn   r  r&  )r   r   r   r   r  re  r  r  r/  r  r  r  r   rx   rv   r  r    sD    
,,  $  rx   r  c                      e Zd ZddZddZy)	NopKernelc                     yr  r   r[  s    rv   r  zNopKernel.is_no_op8  rp  rx   c                    t               S rq   r/   r[  s    rv   re  zNopKernel.get_reads;  rM  rx   Nr  r%  )r   r   r   r  re  r   rx   rv   r  r  7  s    rx   r  c                  J    e Zd ZdZed        Zedd       Zed        ZddZy)	ConcatKernelzn
    There isn't actually a real kernel for concat, we just change the
    storage for the upstream data.
    c                ,	   |d   j                         }|d   j                         }t        |d   j                               }dg}||   g}d|cxk  rt	        |      k  sJ  J t        dt	        |            D ]  }||   j                         }	|j                  ||          t	        |	      t	        |      k(  sJ ||   j                         |k(  sJ ||   j                         |k(  sJ t        t	        |            D ]I  }
|
|k(  r||
   |	|
   z   ||
<   t        j                  j                  j                  ||
   |	|
         ||
<   K |j                  ||           t        j                  |      }t        j                  r$t        j!                  |||d   j"                        }t        t	        |            D ]k  }||   }t%        |      s|j'                         }t)        |t*              s5t        j-                  |j.                  |j0                        s`t3        |      } n t5        d |D              }t        j                  j6                  j8                  d   }t)        |t              sJ |du rt5        d |D              rt3        |      }t;        d t+        ||||      g       }t=        |      }g }t        t	        |            D ]  }| j?                  ||   t@        jC                  ||||   ||   d            }|jD                  j                  |       t)        ||   jF                  tH              r||   jF                  jK                         }n||   jF                  }|jM                         stO        ||   j                         jP                        stS        |      r|j                  |jU                                 t	        |      dkD  rMt        j                  jW                  |tX        jZ                        rt        j                  j]                  |       t        j                  j_                  |      |_0        | jc                  |jD                        |_"        t        j                  je                  |       |S )	Nr   r5   c              3  2   K   | ]  }t        |        y wrq   )r   r'  s     rv   r  z&ConcatKernel.create.<locals>.<genexpr>n  s     -W1.CA.F-Wr(  Fc              3     K   | ]p  }d |j                   v xr\ |j                   d    j                  t        j                        xs- |j                   d    j                  t        j                         r yw)ru  r  N)r<  rW  r   r  r  r  args     rv   r  z&ConcatKernel.create.<locals>.<genexpr>r  sq      <
  SXX --E<O<O-P W88E?00u?U?U0V<
s   A6A8)r   r   r   r   r   r*  rC  )r1  )3r   r   r   r   r   r   r/  r`   r   r   r  r   r   r6   r  r  r  r   r   r   rr   r,  rr  r   r   r"   r  r  r   r  r_  r  r$  ry  rC  r)  rj  r  r  rW   r   rV   r  r=  r8   FOREACHregister_operation_listr  r   r  r  )rz  rC  r  r   r   r  offsets_startoffsets_endr   
input_sizer[  output_strideru   r*  any_input_is_storage_and_layoutfx_node_argsconcat_kernelkernelop_namesinput_bufferinput_unwrappeds                        rv   ry  zConcatKernel.createE  s   %%'q	##%q	**,-}oC'#h-'''''q#f+& 	.A++-J  #/z?c(m333!9&&(E111!9'')V3333x=) 8"*1+
1"=HQK"#''"2"2"?"? Z]#HQK	 x}-	. '99(C''"//xM
 s6{# 		Aq	A$Q'K88fmmT$B8$LM		 +.-WPV-W*W'ww++003,---*e3 <
 $<
 9
 ;8DM$$	 	
 M*s6{# 	CA++q	  Cq!1;q> ! L   ''5&)..(3"().."<"<">"()..  //16!9//1667"<0 ? ? AB'	C* x=1!4!4V^=S=S!TGG++H5WW44]C"11-2F2FG	""=1rx   Nc                d   t        |t              r| j                  |j                  |      S t        |j                  t              rt        |j                  j
                  t              r|j                  j                  sy|yt        |j                               t        |j                               k(  syt        d t        |j                         |j                               D              S t        |j                  j
                  t              xr t        |j                  t               S )NFTc              3  v   K   | ]1  \  }}t         j                  j                  j                  ||       3 y wrq   r%  r&  s      rv   r  z=ConcatKernel.can_realize_into_without_copy.<locals>.<genexpr>  s3      B   88R@r'  )rr   rm   can_realize_into_without_copyr)  r  r*  r,  r  r   r  r  r   r   ExternKernelAlloc)rz  r  r  s      rv   r1  z*ConcatKernel.can_realize_into_without_copy  s    c9%44SXXsCCchh 34sxx<xx00 { s~~'(C0@,AA !#.."2CNN4DE  
 #((//>: 
:HH'D
 @
 	
rx   c                L   t        |t              s&t        |      rt        |      \  }}t        ||      }t        |t              sJ |       t        |t              r| j                  |j                  |      S t        |t              r`|j                          t        |j                  d      sJ | j                  ||      r&t        |      |j                  _        |j                  S t        j                  |j                         |j!                         |j#                         t%        |j'                         |j'                               D cg c]/  \  }}t(        j*                  j,                  j/                  ||      1 c}}      }| j                  ||      S c c}}w )Nr(  r*  r  )rr   r.  r   r+  rm   r  r)  r_  r  r=  r1  r  r*  r  ry  r   r   r  r   r   r`   r   r   r  )rz  r  r  r1  r*  r  r  pws           rv   r  zConcatKernel.realize_into  sH   
 #/$S)"7"<%76B#/44/c9%##CHHc22c:&KKM388X...00c:"1#"6xx>>#--/__&  ?Aq   --a3	  
 C((s   4F c                     yr  r   r[  s    rv   r  zConcatKernel.should_allocate  rp  rx   rq   r  )	r   r   r   r  r  ry  r1  r  r  r   rx   rv   r  r  ?  sL    
 ^ ^@ 
 
< ) )@rx   r  c                      e Zd ZU dZded<    ej                  e      Zded<   dZ	ded	<   dZ
d
ed<   dZd
ed<    ej                  e      Zded<   dZded<   dZded<   dZded<    ej                  e      Zded<    ej                  e      Zded<   	 	 	 	 	 	 	 d<	 d= fdZd>dZd?dZd Zd Zd=dZd Zd@dAd ZdBd!Zd" Zed#        Ze	 	 dCd$       Z ed%        Z!ed&        Z"ed'        Z#e	 	 	 dD	 	 	 dEd(       Z$edFd)       Z%edFd*       Z&ed+        Z'ed,        Z(ed-        Z)ed.        Z*d=d/Z+d0 Z,d@dGd1Z-d2 Z.d3 Z/dFd4Z0dHd5Z1d=d6Z2d=d7Z3d8 Z4d9 Z5	 dF	 	 	 dId:Z6dHd;Z7e7Z8 xZ9S )Jr  r   ztuple[Any, ...]constant_argsr  zdict[str, Any]r   NzOptional[ReinterpretView]output_viewr  python_kernel_namecpp_kernel_namezIterable[str]ordered_kwargs_for_cpp_kernelzFOptional[Union[torch._ops.OpOverload, torch._ops.HigherOrderOperator]]op_overloadzOptional[list[dict[str, Any]]]arg_propertiesz#Optional[dict[str, dict[str, Any]]]kwarg_propertiesz"dict[sympy.Symbol, pytree.KeyPath]unbacked_bindingszlist[MutationOutput]mutation_outputsc                6   t         |   |||       || _        |r|ni | _        || _        |
| _        | j                  |       | j                  |       |	| _        | j                          i | _
        g | _        t        j                  j                  | _        y Nr!  )ri  r  r7  r   r8  r<  set_cpp_kernel_nameset_python_kernel_namer;  collect_arg_kwarg_propertiesr?  r@  r`   r   r  fx_node)rU  r   r*  rC  r7  r   r8  r9  r:  r;  r<  rk  s              rv   r  zExternKernel.__init__   s     	 	 	

 + &fB&&  1##$67-J*))+!# "ww++rx   c                     | g| j                   S rq   )r@  r[  s    rv   rJ  zExternKernel.get_outputs  s    -t,,--rx   c                    t               S rq   r/   r[  s    rv   rL  z%ExternKernel.get_unbacked_symbol_defs!  rM  rx   c                N   t        | j                  t        j                  j                        r\| j                  j
                  j                  D cg c]2  }|j                  s$|j                  |j                  |j                  d4 c}n+t        t        | j                              D cg c]  }i  c}| _        t        | j                  t        j                  j                        rP| j                  j
                  j                  D ci c]&  }|j                  |j                  |j                  d( c}ni | _        t        | j                  t        j                  j                        r| j                   sJ| j                  j
                  j                  D cg c]  }|j                  s|j                   c}| _        | j                  j
                  j                  D cg c]  }|j                  s| c}| _        y g | _        y c c}w c c}w c c}w c c}w c c}w )N)r   r   r  )r   r  )rr   r<  r   _ops
OpOverload_schema	arguments
kwarg_onlyr   	real_typer  r   r   rC  r=  allarg_propertiesr;  schema_kwargs)rU  ru   r   s      rv   rE  z)ExternKernel.collect_arg_kwarg_properties$  s    $**EJJ,A,AB ))11;; || FFKK%&__ $C$456"6 	$ $**EJJ,A,AB ))11;; qOO
  	 d&&

(=(=>55$($4$4$<$<$F$F6 !,,AFF62  ++33==""D "$D? 76"s*   7H+	H+HH/H'H"9H"c                z    t        | j                  t              r!| j                          | j	                          y y rq   )rr   r*  r   apply_constraintr  r[  s    rv   r1  zExternKernel.decide_layoutI  s-    dkk>2!!#  3rx   c                J    t        | |      \  }}|r|j                  |       y y rq   )rS   make_comment)rU  wrapper
origin_str_detailed_origin_strs       rv   codegen_commentzExternKernel.codegen_commentN  s*    +>tW+M(
(  , rx   c                    t         rq   r9  rU  rV  s     rv   codegenzExternKernel.codegenS  r:  rx   c                   || _         t        j                  j                  r.t	        | j
                  t        j                  j                        sy | j
                  }| j                   |j                  dk(  rU|j                  dk(  r|j                  j                  d      d   n|j                  j                  dd      }d| d| _         y |j                  j                  | _         y y )Natenr  .r   r   z
at::_ops::z::call)r:  r`   r   cpp_wrapperrr   r<  r   rJ  rK  	namespace_overloadnamer   rT  replacerL  r   )rU  r:  r+  opnames       rv   rC  z ExternKernel.set_cpp_kernel_nameV  s    .ww""*ejj33+
 !!'6) ++y8 OO))#.q100c: 
 *4F86'B$'-~~':':$ (rx   c                   || _         |y | j                  }|y t        |t        j                  j
                        rd|j                   | _         y |j                  j                  dd       d|j                   | _         y )Nztorch.ops.higher_order.._ops..ops.r_  )	r9  r<  rr   r   rJ  HigherOrderOperatorr   r   rc  )rU  r9  r+  s      rv   rD  z#ExternKernel.set_python_kernel_namen  s    "4)!!>

 > >?(??P&QD# $$,,Xw?@&//ARS #rx   c                &   | j                         x}r|j                  nt        j                  j                  }t        j                  j
                  r4t        j                  j                  j                  | j                  |      S | j                  S rq   )
r   r   r`   r   device_typer`  r  get_c_shim_func_namer:  r9  )rU  dr   s      rv   get_kernel_namezExternKernel.get_kernel_name}  sn    !%!22A29L9L ww"" GG  55d6J6JFS	
 ((	
rx   c           	        t         j                  | j                         | j                         | j	                         | j                         | j                         | j                               }|j                          |S )N)r   r   rW  rX  rO  rM  )	r  ry  r   r   r  r   rl  ri  r  )ru   r4  s     rv   
copy_inputzExternKernel.copy_input  sa    <<>++-]]_::<))+oo'  
 	

	rx   c                H	   ||d}t        j                  |      \  }g g }g }|D ]  }j                  t        |t              xr t        |t
                      d   r|j                  |       Lt        |t        j                        r5t        j                  j                  j                  j                  |d       }|j                  |        fd}	|D 
cg c]  }
| j                  |
       }}
|D ]  }
t        |
      st        |
d        g }|D ]  }
t        |
t               se|
j#                         t        j                  j$                  v r;|j                  t        j                  j$                  |
j#                                   yt        |
t               se|
j#                         t        j                  j&                  v r;|j                  t        j                  j&                  |
j#                                   t        |
t(              r!|j                  |
j+                                t        |
t,        j.                  j0                  j
                        ro|
j2                  j4                  }|
j2                  j6                  dk(  r|J |j                  t,        j8                  j:                  |   j=                                |j                  t?        |
d               |	||      \  }} ||i |}d }t        j@                  j                  x}rt        jB                  jD                  jG                  d	      }tI               }t        jB                  jJ                  t,        jL                  jN                  jP                  k(  r|d
   }tS        t        jB                        }|5  tU        |t        jB                  |       d d d        tW        |||      }t        |tX        tZ        f      s|gn|}|D ]~  }t        |t,        j\                        s|j^                  s+d}t        j                  jB                  jD                  jG                  dd       x}r| d| }|t        j                  _0         ||||	|fS c c}
w # 1 sw Y   xY w)N)r   r   r  )r  c                $   g }t        |       }t        |      }D ]9  }|r|j                  t        |              |j                  t        |             ; t        j                  |      }|j                  dg       |j                  di       fS )Nr   r   )iterr/  nextpytreetree_unflattenr  )	new_tensor_argsnew_non_tensor_argsr  
it_tensorsit_non_tensors	is_tensorr|  	args_specis_arg_tensors	          rv   unflatten_argsz3ExternKernel.process_kernel.<locals>.unflatten_args  s    Fo.J!"56N* 8	MM$z"23MM$~"67	8
 %%fi8A55$aeeHb&999rx   TrQ  r  )r   ru  r5   zEsparsity not handled. Please file issue for sparse inference weights.stack_tracez Found from : 
 )1rt  tree_flattenr/  rr   rn   GeneratorStater   r   r`   r   r   r   create_symintnoder  r   r+  rj  r  	constantstorchbind_constantsr  	get_valuer   r  irr   r   r   r  default_generatorsclone_stater   rl  r  r<  r  r   r  _higher_order_opseffectswith_effectsr%   r*   r&   r   r   Tensor	is_sparsedisable_cudagraphs_reason)rz  r+  r   r   binded_args	args_flattensor_argsnon_tensor_argsr   r}  ru   example_argsdevice_indexnew_args
new_kwargsexample_outputr?  r   node_meta_valctxexample_out_lir   msgr~  r{  r|  s                           @@rv   process_kernelzExternKernel.process_kernel  s     $v6%22;?	9%' 		,C  3'O
30O,O R ""3'c5::.''**44FFsQUFVC&&s+		,
	: 6AAs((+AA  	6A$Q'%a5	6 	 	  	LA a*qzz|qww?P?P/P##AGG$5$5ajjl$CDq(+JJLAGG$?$??##AGG$?$?

$MNA/##AKKM2Au11@@A xx~~xx}}.<3KKK##JJ11,?KKM ##$5aT$JK'	L*  .lOL*8Z8JN---9-NN//33E:M-C~~$$(?(?(G(G(T(TT -a 0<Q^^L K	1>>>JK 9>=! ntUm<  	
   	8A!U\\*q{{]"#''"6"6";";"?"?t"TT;T E!2;-@C471	8 
 	
O BjK Ks    R=RR!c           
        t        |t              sJ t        |t              r|S |j                         }t        j
                  j                  |j                               }|J |j                         }|d|j                  v rt        |j                  t              r|j                  d   j                  t        j                        s-|j                  d   j                  t        j                        r)|j!                  t#        |j%                                      n|j'                          t)        j*                  |j%                         d      \  }}|d   } |j-                         |      }t        j
                  j.                  j1                  ||      }t        j
                  j.                  j3                  ||      }	t        j
                  j.                  j5                  ||      }
t7        ||	      |
z   }||k7  rt8        j;                  d|	|
|       t<        t        |j>                  tA        |jC                         |jE                         |j%                         |	|
            S )	z
        In order to pass this to an extern kernel we need a
        ReinterpretView not a View.  This allows us to avoid some
        unneeded copies.
        ru  r  r|  rk   r   z@convert_to_reinterpret_view failed: stride=%s offset=%s index=%sr  r(  )#rr   rj  r.  r  r`   r   r  r  rl  r<  r*  r   rW  r   r  r  r  r"   r   r  r7   rG  r  r   rH  stride_vars
offset_varrX   rE  rF  r  r)  r,  r  r   )rz  ru   x_unwrap_viewr6  x_unwrap_view_fx_node
index_argsrY  r,  r   r0  r-  expecteds               rv   convert_to_reinterpret_viewz(ExternKernel.convert_to_reinterpret_view
  s    !X&&&a)H gg  !7!7!9: # 3 3 5 "-.333=//@%**51??"'"5"5 @  )--e4BB"'"8"8 C 
 77.}/E/E/GH '')!-!@!@JJL"

J  ]
  ,  55eZH''""..ujA!!,,UJ?Z1F:HIIR	 &%,,.kkmZZ\	
 		
rx   c                   |
t               S t        |t        j                  t        j                  j
                  j                  t        f      rt        |      S t        |t              r[t        j                  j                  t        j                  |j                  |j!                         |j#                                     S t        |t$              r|S t        |t&              r| j)                  |j*                        S t        |t,              r4t-        | j)                  |j*                        |j/                               S t        |t0              r;|j3                          t5        |j7                               r	 | j9                  |      S t        |t<              r|j3                          |S t        |t>        t        f      r|S | jA                  |      S # t:        $ r Y Vw xY w)N)r  rk  r(  )!r0  rr   r   r   r   r   r   rs   r   rH  r`   r   add_tensor_constantr   r/  r   r   r   r  rm   r  r)  r.  r   rj  r  r   r  r  r  r_  NonTensorObjro  r  s     rv   r  zExternKernel.realize_inputO  sm   9'))a%**ekk&9&9&A&A3GH(a00a"77..QWWAKKM!,,.Q  a(Ha#$$QVV,,a)"&&qvv.q||~  a"IIK$Q]]_5::1== a$IIKHa,(=>?H~~a   + s   G 	G*)G*c                    t        |      r<t        |j                               dk(  r|S |j                         D ]  }|dk(  s	|c S  | j                  |      S r  )r   r   r  ro  )rz  ru   r   s      rv   require_stride1zExternKernel.require_stride1p  sT     #1<<>"a',,. Q;H ~~a  rx   c                	   ||J |j                         dv r|s|S t        |      rt        |j                         t              r}|rht        |ddt        ||      rJt        t        j                  j                  j                  |j                         j                              n||       |S t        |ddd ||       |S t        |j                         t        t        f      rf|r|j                         j                  |      s5|rCt!        ||j                         j                  |j#                               r|t%        ||      S |S t        |j                         t&              rt        |j                         j)                         t              rt+        d      t        |j                         j)                         t              rt|r-|j                         j)                         j                  |      sC|rCt!        ||j                         j)                         j                  |j#                               r|S t        |t,              rX|r|j                         j                  |      s5|r5t!        ||j                         j                  |j#                               r|S t        |t.              rt        |j0                  t2              rt        |j0                  t4              st        |j7                               rvt        |j7                         j0                  t8              sN	 | j;                  |j0                        |_        |r| j=                  |||      S |r| j?                  |||      S 	 d }|j#                         }|t        j                  j                  }tC        tE        |j#                                     D cg c]<  }|jG                  ||   d      r%|jI                  |j#                         |   d	      r|> }}|D ].  }	tJ        jL                  jN                  jQ                  ||	dd
      }0 | jS                  |      }t        |dd|||       |rt        ||      sJ |S |r<||J tJ        jL                  jN                  jU                  ||      }t%        ||      S |S # t@        $ r Y Hw xY wc c}w )N)r   r5   TF)rR  r\  r]  r  r[  zHthe MutationLayoutSHOULDREMOVE's real layout shouldn't be FlexibleLayoutr^  r   r   r5   )+r  r   rr   r   r   r+  rd  r   r`   r   r   
size_hintsr   r,  r  rc  r"  r   r5  r  r  r  r%  rm   r)  rj  r.  r  r2  r  require_stride_orderrequire_exact_stridesr  r   r   r  r*  r   r  loweringslice_ro  ri  )
rz  ru   r   r  r  expanded_dims	orig_sizer   r   r  s
             rv   require_strideszExternKernel.require_stridesz  s>     M$===;;=F"=H !#!,,..9 *#(-
  B!UK - ! 0 0 ; ;ALLN<Q<Q R "'&3 H *#(-%)&3&3 HALLN[/,JK1<<>;;EB!1%q||~'<'<ajjl %0 4A}E 
 ALLN,FGalln88:NK(b    : : <kJq||~99;MMeT%5)LLN668??JJL H a%q||~77>-!1<<>#8#8!**,
 Hq)$1668,qvv7%ammo6q}}335FG88@335 4   #44= 5   # .2JJL	$ww''H s1::<0133M!4DaH11!**,q/1E M  % BOO,,33AsAqAB
 NN1!''	
 5a???  (]-FFF((//9=A21mDDW ' s   15R: 'R: AS
:	SSc                *    | j                  |||      S )N)r  r  r  )rz  ru   r  r  s       rv   r  z"ExternKernel.require_exact_strides  s!    ""]- # 
 	
rx   c                *    | j                  |||      S )N)r   r  r  )rz  ru   r   r  s       rv   r  z!ExternKernel.require_stride_order  s    ""1E"OOrx   c                .    | j                  |t              S rq   )r  r  r  s     rv   require_channels_lastz"ExternKernel.require_channels_last!  s    ''+<==rx   c                .    | j                  |t              S rq   )r  r  r  s     rv   require_channels_last_3dz%ExternKernel.require_channels_last_3d%  s    ''+=>>rx   c                    d } ||      r|S | j                  |t        j                  |j                                     S )Nc                    d } ||       t         j                  j                  v xr- t         j                  j                   ||          j                  S )Nc                N    	 | j                         S # t        t        f$ r Y y w xY wrq   )r  AttributeErrorr  rt   s    rv   safe_get_namezPExternKernel.require_contiguous.<locals>.is_mkldnn_tensor.<locals>.safe_get_name,  s+     ::<'&(;<   s    $$)r`   r   r  	is_mkldnn)ru   r  s     rv   is_mkldnn_tensorz9ExternKernel.require_contiguous.<locals>.is_mkldnn_tensor+  sH      a AGG$5$55 BGG%%mA&67AArx   r  r   r   r   )rz  ru   r  s      rv   r  zExternKernel.require_contiguous)  s?    
	 AH,,>44QZZ\B rx   c                h    | j                  |t        j                  |j                                     S rq   r  r  s     rv   require_contiguous_stridesz'ExternKernel.require_contiguous_strides?  s-     ((~00>
 	
rx   c                     y rq   r   r[  s    rv   rS  zExternKernel.apply_constraintG  r  rx   c                   t        |t        t        f      sJ t        |t              rt        |      }| j                  sJ d       t	        |      }t	        | j                        }||k  rqt
        j                  d| j                  ||z
         t        ||      D ]>  }| j                  |   d   }|j                  ||v r||   n| j                  |   d          @ |S )Nz/ExternKernel.arg_properties should not be emptyzv%s has %d unprovided positional arguments. Will check if they are in the keyword arguments or will use default values.r   r  )
rr   r   r   r=  r   rE  rF  r<  r   r/  )rU  r   r   n_args
n_pos_argsr   arg_names          rv   fill_non_provided_argsz#ExternKernel.fill_non_provided_argsJ  s     $u...dE":D""U$UU"T,,-
 JII^  V#	 6:. ..q1&96) 8$,,Q/@ rx   c                8   t         j                  j                  rCg }d }|r]| j                  rQt	        | j
                        t	        |      k(  sJ d       | j                  D ci c]  }|j                  d      | }}t        | j
                        D ]  \  }}|*|j                  ||         }|r|j                  d      nd }n\t	        | j                        |z   }	| j                  r6|	t	        | j                        k  r| j                  |	   j                  d      nd }|j                  t         j                  j                  j                  ||              |S t        t         j                  j                  j                  | j
                        S c c}w )NzDnames passed to codegen_const_args does not match self.constant_argsr   r   )r`   r   r`  r=  r   r7  r  r   rC  r/  r  val_to_arg_strry  )
rU  rf  r  name_to_arg_propertiesr   r   ru   proptype_r   s
             rv   codegen_const_argszExternKernel.codegen_const_argsl  ss   77F
 &*",,4--.#e*< Z< 594G4G*-0CGGFOS(*& * "$"4"45 M1)5155eAh?D04DHHV,$Edkk*Q.C  ..3T=P=P9Q3Q ++C044V<! 
 agg22AA!UKLM Mqww++::D<N<NOO%*s   $Fc                   t         j                  j                  rC| j                  7| j	                  g | j
                  | j                  | j                        }d}n| j
                  }d}g }t        |      D ]  \  }}t         j                  j                  r| j                  r|t        | j                        k  sJ d       | j                  |   j                  d      }|j                  t         j                  j                  j                  ||             |j                  t         j                  j                  j                  |              |r|j                  | j!                                |S )NFTz-Invalid access to ExternKernel.arg_propertiesr   )r`   r   r`  r<  r  rC  r7  r   r   r=  r   r  r/  r  r  rp  r  )rU  rC  need_codegen_constant_argsr   r   ru   r  s          rv   codegen_argszExternKernel.codegen_args  s5   774#3#3#?003$++3 2 23T[[F */&[[F)-&f% 	DDAqww""**q3t7J7J3K/K CK ++A.226:AGG00??5IJAGG00??BC	D &KK//12rx   c                "   ||v r|j                  |      S || j                  v r| j                  j                  |      S | j                  r8|| j                  v r*| j                  j                  |      j                  d      S t        | d      )zGiven an argument name, queries for values in (in order):
        1. any provided kwargs for this function.
        2. the class self.kwargs member.
        3. any available default arguments in self.allarg_properties.r  z not in self.allarg_properties)r  r   rP  r  )rU  r  r   s      rv   get_kwargs_valuezExternKernel.get_kwargs_value  s    
 v::h''t{{";;??8,,!!h$2H2H&H))--h7;;OLLz)GHIIrx   c           	        t         j                  j                  r| j                  t	        | j
                        dk(  rg S g }| j                  D ]  }|r|dk(  r| j                  |      }t        |t        j                        r|j                  |       H| j                  r8|| j                  v r*| j                  j                  |      j                  d      nd }|j                  t         j                  j                  j                  ||              |S | j                   j#                         D cg c]3  \  }}| dt         j                  j                  j                  |       5 }}}|S c c}}w )Nr   r   r   rc  )r`   r   r`  r<  r   rQ  r;  r  rr   r   r   r/  rP  r  r  r  r   rX  )rU  skip_outr   r  rS  r  ks          rv   codegen_kwargszExternKernel.codegen_kwargs  sO   77+D4F4F0G10L	F >> QE 1))(3a,MM!$  11h$BXBX6X ..228<@@H! 
 MM!''"6"6"E"Ea"OPQ(  !KK--/Aq #Qqww++::1=>?F  	s   78E4c                    | j                   S| j                   j                  }t        |dd      }|j                  dd      }|j	                  dd      d   }| d| }|S d}|S )	Nr   unknown_namespacerf  rg  r_  r5   r   
unknown_op)rF  r  r   rc  rsplit)rU  r  op_namespaceop_names       rv   get_op_namezExternKernel.get_op_name  sv    <<#\\((F"6<9LML'//'BL'..sA6q9L%ax0G  #Grx   c                   t         j                  rt        j                  j                  st        | j                               dk(  ry t        j                  j                  j                  | j                               }t        j                  j                  j                  | j                               }| j                         }|j                  d| j                          d| d| d|d	       y y y )Nr   zassert_size_stride(r  r   )r6   size_assertsr`   r   r`  r[   r   r  codegen_shape_tupler  r  r  r  )rU  rV  r   r   r  s        rv   codegen_size_assertsz!ExternKernel.codegen_size_asserts  s    qww':':T]]_-277'';;DMMOLDWW))==doo>OPF&&(G%dmmo%6bb7+UVW (;rx   c           	     H   t         j                  rt        j                  j                  sw| j                         }|t        j                  j                  v}| j                         }|r |j                  d| dt         d|d       y |j                  d| d| d       y y y )Nzassert_alignment(r  r   z	# buffer z (op: z) is assumed to be not aligned)
r6   alignment_assertsr`   r   r`  r  rg  r  r  rT   )rU  rV  r   alignedr  s        rv   codegen_alignment_assertsz&ExternKernel.codegen_alignment_asserts  s    ##AGG,?,?==?D!''";";;G&&(G!!'vR/@7+QO !!vVG94RS -@#rx   c                N    | j                         }| j                         }|g g|fS )zD
        get output sizes and strides, for template_codegen
        )r   r  )rU  _size_strides      rv   get_group_stridezExternKernel.get_group_stride  s*     //#r{G##rx   c                   t         j                  j                  }| j                         }| j	                         }|D cg c]  }|j                  |       }}t        t        |            D cg c]  }t        d|        }}t        t        t        |            |j                  d      }t        |      D 	ci c]  \  }}	|	|
 }
}}	t        t        |
            D cg c]  }|
|   	 }}|D cg c]  }||   	 }}| j                         } ||      }t         j                  j                  j                  |||g      \  }}}t        d      \  }}t        t!        | ||D cg c]
  } ||       c}                  }t#        t%        j&                  |      |      }|t)        |      fS c c}w c c}w c c}	}w c c}w c c}w c c}w )zC
        Manually get canonicalization of the output index
        rl  T)r(  r  c)r`   r   r   r   r  r   r   r   rY   r.  r  r   r  rc  r@   r   r   r\   r   ri  r   )rU  r   r  r0  ru   r   rR  index_orderr   r   r   r   r  r   	new_sizesr   rh  r   add_varreplacements                       rv   canonicalizezExternKernel.canonicalize  s   
 77##//#29:Q8%%a(::;@U;LMa(1QC1M
MU3w<0g6I6ISWX+4[+ABxsC#s(BB$)#f+$67q77-23jm3
3##%
#%&WW%5%5%E%Ew&
"	7F !%
73z7	3R1GAJ3R+STU5<<.<eI&&&+ ;M C73 4Ss#   F5/F:>F?$G6G
+Gc                    |rt         nt        }t        t        j                            }| j
                  D ]  }| ||      z  } | j                  j                         D ]  }| ||      z  } |S rq   )maybe_free_unbacked_symbolsmaybe_free_symbolsr0   r   r   r7  r   r   )rU  r   maybe_get_symbolsr|  r   s        rv   r  z!ExternKernel.get_free_symbol_uses  s{     ,9'>P 	 u||$&%% 	(C"3''A	(;;%%' 	(C"3''A	(rx   c           
     "   t        | dd       }d|g}|t        j                  |       D cg c]'  }|j                   dt        | |j                         ) c}z  }|j	                  d| j
                         | j                  |      S c c}w )Nr9  zpython_kernel_name=rc  rd  )r   r-  fieldsr   r/  rO  r  )rU  kernel_namer|  r.  s       rv   ro  zExternKernel.__str__,  s    d$8$?!+1
 	$++D1
 zzl!GD%**567
 	
 	|D$4$4#789u%%
s   ,Br   NNNNr   Nr  rQ  rS  rq   r:  r  r   r   )r9  r  r   r   )r   zituple[Any, list[Any], list[Any], Callable[[Any, Any], Any], Optional[dict[sympy.Symbol, pytree.KeyPath]]])NNF)r   zOptional[Sequence[int]]r  r	  r  )rf  rL  r  r(  ):r   r   r   r7  r   r-  r.  r   r   r8  r9  r:  r   r;  r<  r=  r>  r?  r@  r  rJ  rL  rE  r1  rY  r\  rC  rD  rm  r/  ro  r  r  r  r  r  r  r  r  r  r  r  r  rS  r  r  r  r  r  r  r  r  r  r  r  ro  r  r  r  s   @rv   r  r    s   %'M?'.[..tDFND-1K*1(,,%)O]) 4E;3D3D4!= 
 	    6:N29<@9@<MK<M<M=9  .?[->->t-T*T &(, 
,<.#$J!
-
";0
 
 
 u

u
 u
n B
 B
H ! !@ ! !  *.6:Z 'Z 4	Z Zx 
 

 P P > > ? ?  * 
 
 DP@4J:	
$'@ %*!	!
& Hrx   r  c                  B     e Zd ZddZ	 	 	 	 	 	 	 d	 d fdZddZ xZS )ExternKernelOutc                &    |j                  |        y rq   )generate_extern_kernel_outr[  s     rv   r\  zExternKernelOut.codegen=  s    **40rx   c
                    t         
|   d || j                  |      ||xs i d ||||	
       t        j                  j                  |       | _        t        j                  j                  |        y rq   )ri  r  r  r`   r   r  r   r  )rU  r*  rC  r7  r   r8  r9  r:  r;  r<  rk  s             rv   r  zExternKernelOut.__init__@  si     	'Lb)	
 GG++D1		""4(rx   c                     yr  r   r[  s    rv   r  zExternKernelOut.should_allocate[  rp  rx   r  r  r  )r   r   r   r\  r  r  r  r  s   @rv   r  r  ;  s3    1 &() 
)6rx   r  c                        e Zd Zd fdZ xZS )RandomSeedsc                   t        j                  t         j                        }t        |   t        |t         j                  |g      g |j                  |j                  |ggddt        j                  j                         y )Nr#  zaten.randint.low_outzat::_ops::randint_low_out::call)r*  rC  r7  r9  r:  r<  )r   r  rk  ri  r  r,  r  r  r^  randintlow_out)rU  countr   limitsrk  s       rv   r  zRandomSeeds.__init__`  sl    U[[)kkW
 !::vzzE7;5 >,, 	 	
rx   )r  rs   r   r  r   r   r   r   r   r  r  r  s   @rv   r  r  _  s    
 
rx   r  c                  F     e Zd ZddZ	 	 	 	 	 	 d	 d fdZddZd Z xZS )r2  c                &    |j                  |        y rq   )generate_extern_kernel_allocr[  s     rv   r\  zExternKernelAlloc.codegent  s    ,,T2rx   c	                    t         	|   d || j                  |      ||xs i d ||||
       g | _        t        j
                  j                  |       | _        t        j
                  j                  |        y rq   )	ri  r  r  r  r`   r   r  r   r  )
rU  r*  rC  r7  r   r9  r:  r;  r<  rk  s
            rv   r  zExternKernelAlloc.__init__w  sp     	'Lb)	
 ')GG++D1		""4(rx   c                     yr  r   r[  s    rv   r  z!ExternKernelAlloc.should_allocate  r  rx   c                    t         rq   r9  r[  s    rv   rS  z"ExternKernelAlloc.apply_constraint  r:  rx   r  )r   NNNr   Nr  )r   r   r   r\  r  r  rS  r  r  s   @rv   r2  r2  s  s5    3 &() 
)<"rx   r2  c                  <     e Zd ZdZd fdZddZddZd	dZ xZS )
r  zP
    An output buffer that represents the mutation of a pre-existing buffer
    c                    t         |   d |       |j                         }t        j                  j                  |       |g| _        || _        t        j                  j                  |       | _	        y r.  )
ri  r  r  r`   r   r  mutation_namesmutating_noder  r   )rU  r*  mutated_noder  mutated_node_namerk  s        rv   r  zMutationOutput.__init__  s`    d62(113	##$5601(5GG++D1	rx   c                    | j                   S rq   )r  r[  s    rv   ro  zMutationOutput.get_defining_op  s    !!!rx   c                    | j                   S rq   )r  r[  s    rv   r  z!MutationOutput.get_mutation_names  r  rx   c                     yr  r   r[  s    rv   r  zMutationOutput.should_allocate  r  rx   )r  r4  r   r   r#  r,  r  )	r   r   r   r  r  ro  r  r  r  r  s   @rv   r  r    s    2"#rx   r  c                       e Zd ZU dZi Zded<   e	 	 	 	 	 	 d	d       Ze	 	 	 	 	 	 d	d       Zd
 fdZ	ddZ
ddZ xZS )TMADescriptorad  
    An IR node representing a generic host-side TMA descriptor in the Triton API
    Mostly useful for user-defined Triton kernels relying on host-side TMA;
    but can, in principle, be used for Inductor's Triton templates, too.

    See TMADescriptorExperimental and TMADescriptorStable for the two implementations
    (the old API and the new API)
    zdict[Any, TMADescriptor]_CACHEc                    t        |      dk(  sJ |d   dk(  rt        |g|d    S |d   dk(  sJ t        |g|d    S )Nr   r   experimentalr5   r8  )r   TMADescriptorExperimentalTMADescriptorStable)rz  r/  tma_metas      rv   _create_implzTMADescriptor._create_impl  s\     8}!!!A;.(,VBhqkBBA;(***&v<<<rx   c                    t        |      |f}|| j                  vr| j                  ||      | j                  |<   | j                  |   S rq   )idr  r   )rz  r/  r  r(  s       rv   ry  zTMADescriptor.create  sF     &z8$cjj !..vx@CJJsOzz#rx   c           
        t         |   d t        t        ||j	                                     |t        |      d        || _        t        j                  j                  |       | _
        t        j                  j                  |        y )Nr(  )ri  r  r  r.  r   r   r/  r`   r   r  r   r  )rU  r/  rC  r7  rk  s       rv   r  zTMADescriptor.__init__  su     !,,. - 	
  GG++D1		""4(rx   c                &    |j                  |        y rq   )generate_tma_descriptorr[  s     rv   r\  zTMADescriptor.codegen      ''-rx   c                    | j                   S rq   )r/  r[  s    rv   
get_tensorzTMADescriptor.get_tensor  rr  rx   )r/  rn   r  ztuple[str, tuple[Any, ...]]r   r  )r/  rn   r  r  )r   r   r   r  r  r   r  r   ry  r  r\  r(  r  r  s   @rv   r  r    sz     (*F$)=='B=	= = 'B	 )*.rx   r  c                  :     e Zd ZdZ	 d	 	 	 	 	 	 	 	 	 d fdZ xZS )r  z
    the new host-side TMA Descriptor API:
    (the ones obtained via create_{1d,2d}_tma_descriptor calls).

    See also TMADescriptorStable for the new API.
    c                b   t        |      dv sJ t        |      t        |      k(  sJ ||j                         j                  }|| _        || _        || _        t        | j                        | _        |g}g | j                  | j                  | j
                  }t        | !  |||       y )N)r5   r   r/  rC  r7  )	r   r   r[  r  
block_dimselement_sizer  ri  r  )rU  r/  r  r,  r-  rC  r7  rk  s          rv   r  z"TMADescriptorExperimental.__init__  s     4yF"""4yC
O+++!++-66L	$(		N	
YY
__
 
 	' 	 	
rx   rq   )
r/  rn   r  list[Union[int, torch.SymInt]]r,  r.  r-  r  r   r   r   r   r   r  r  r  r  s   @rv   r  r    sG     '+

 -
 3	

 $
 

 
rx   r  c                  $     e Zd ZdZd fdZ xZS )r  z
    the new host-side TMA descriptor API
    (the ones obtained via TensorDescriptor.from_tensor).

    See also TMADescriptorExperimental for the old API.
    c                :    || _         t        | 	  ||g|       y )Nr+  )block_shaperi  r  )rU  r/  r2  rk  s      rv   r  zTMADescriptorStable.__init__!  s(    &8% 	 	
rx   )r/  rn   r2  r.  r/  r  s   @rv   r  r    s    
 
rx   r  c                  <     e Zd Z	 	 	 	 	 	 	 	 	 	 d fdZddZ xZS )SubgraphBufferc                `   t         
|   d ||       || _        || _        t        j
                  j                  |       | _        t        j
                  j                  |        t        j
                  j                  | j                  ||      | _
        t        | j                        }|D ]T  }|| j                  j                  |j                  <   | j                  j                  j                  |j                         V |D cg c]  }|j                   c}| _        dd lmc m}	 t	        j(                  | j                        5  |	j+                  ddd      5   | j                  j,                  | j                    d d d        d d d        y c c}w # 1 sw Y   xY w# 1 sw Y   y xY w)Nr   FATEN)max_autotunemax_autotune_gemmmax_autotune_gemm_backends)ri  r  r>  example_inputsr`   r   r  r   r  make_subgraphsubgraphrF  rC  r  graph_input_namesr/  
sym_inputstorch._inductor.configr  r6   set_graph_handlerr   run)rU  r*  r~   r>  r:  subgraph_namer>  sym_inpsym_varinductor_configrk  s             rv   r  zSubgraphBuffer.__init__,  s\    	v{3,GG++D1		""4(--dgg~}U(5
! 	AG7>DMM&&w||4MM++227<<@	A 8BBG7<<B88  / 	8 &&""'+1 '  8
 "!!4#6#678	8 	8	 C8 8	8 	8s*   F	F$$FF$F!	F$$F-c                     G d d      }| j                   D cg c]  }|j                          }}|j                   || j                        g | j                  || j
                  g       y c c}w )Nc                      e Zd ZddZy),SubgraphBuffer.codegen.<locals>.CodegenGraphc                4    || _         |j                  | _        y rq   )r   r   )rU  r   s     rv   r  z5SubgraphBuffer.codegen.<locals>.CodegenGraph.__init__Q  s    "
!JJ	rx   N)r   rd   )r   r   r   r  r   rx   rv   CodegenGraphrH  P  s    'rx   rJ  )rC  r  'codegen_subgraph_with_flattened_outputsr<  r>  r   )rU  rV  rJ  r   outer_inputss        rv   r\  zSubgraphBuffer.codegenO  sh    	' 	'
 8<{{C!++-CC77'-doo--YYK	
 Ds   A/)
r*  r  r~   rR  r>  torch.fx.GraphModuler:  	list[Any]rB  r   r  )r   r   r   r  r\  r  r  s   @rv   r4  r4  +  s>    !8!8 "!8 !	!8
 "!8 !8F
rx   r4  c                  `     e Zd Zd ZddZ	 d		 	 	 d
 fdZddZ	 	 d fdZddZddZ	 xZ
S )UserDefinedTritonKernelc                D   ddl m} ddlm} |j	                  | j
                        g }g }g }t        |      rt        d      r%|j                  fdj                  D               n)t        d      sJ |j                  j                         t        d      r:j                  D ]*  }|j                  j                  j                  |          , n)t        d      sJ |j                  j                         j                   }j                  |||fS )	Nr   )	Autotuner)kernel_side_tablerestore_idxc              3  P   K   | ]  }j                   j                  |     y wrq   )r   	arg_names)r  r   r+  s     rv   r  zBUserDefinedTritonKernel.get_kernel_and_metadata.<locals>.<genexpr>k  s%      */0FII''**s   #&restore_value	reset_idxreset_to_zero)triton.runtime.autotunerrR  *torch._higher_order_ops.triton_kernel_wraprS  
get_kernel
kernel_idxrr   r=  rp  rT  rW  rX  r/  r   rV  rY  configs)rU  rR  rS  r^  restore_value_argsreset_to_zero_argsr   r+  s          @rv   get_kernel_and_metadataz/UserDefinedTritonKernel.get_kernel_and_metadata^  s   6P"--doo>(*(*fi( v}-")) *4:4F4F*  v777"))&*>*>?v{+)) FA&--fii.A.A!.DEF v777"))&*>*>?nnGYYFw 24FFFrx   c                X   ddl m} | j                         \  }}}}|j                  ||| j                  ||| j
                        \  }}}	| j                  D 
ci c]  }
|
| j                  |
       }}
t        |j                  D cg c]  }|j                  |    c}      }g }g }g }g }t        j                  |j                         t        t        j                  d      |	            D ]d  \  }}|j!                  |       |j!                  |       t#        |t$              r?|j!                  |j'                                |j!                  |j)                                xt#        |t*        t,        t.        t0        j2                  f      r,|j!                  |       |j!                  t5        |             ||v r'|j!                  d       |j!                  t*               |R	  |       r(|j!                  d       |j!                  t*               +|j7                          |j7                          Mt9        dt5        |       d|        | j;                  |       |j=                  ||||||d| j?                         | j@                  jB                  	       y c c}
w c c}w )	Nr   )triton_version_uses_attrs_dictrr  r  zUnsupported arg type: r   T)	arg_typesraw_argsraw_keystriton_metar  r   original_fxnode_name)"torch._inductor.utilsrc  ra  !define_user_defined_triton_kernelr   gridr;  r  r0   
constexprsrV  rd  r  rX  r   repeatr/  rr   rn   r  r   rs   r  r   r   r   r   rx  r  rY  generate_kernel_callr   rF  r   )rU  rV  rc  r+  r^  r_  r`  new_namerg  extra_launch_argsr  
named_argsr   constexpr_namesr   rd  raw_keys_filteredraw_args_filteredr   r   s                       rv   r\  zUserDefinedTritonKernel.codegen}  sr   H ((*	
 55KKII
		
 261S1S
,-At$$Q''

 
 %6CTCT%Uaf&6&6q&9%UV!	')')"I$4$4R$8:K L
 !	WID# $$T*$$S)#v&C1134  1C#udEJJ!?@C   c+( B  % 23KKO$$S)%))+%))+),B49+RPSu*UVVC!	WF 	W%$$&&#??$!%!2!2 	% 
	
[
 &Vs   J"J'c                P    t         |   |      t        | j                  |      z  S rq   )ri  r  r   rk  r	  s     rv   r  z,UserDefinedTritonKernel.get_free_symbol_uses  s-    
 w+M:=MII}>
 
 	
rx   c                    t               S rq   r/   r[  s    rv   rL  z0UserDefinedTritonKernel.get_unbacked_symbol_defs  rM  rx   c                  g }i }g }|j                         D ]  \  }}	t        |	t              rXt        j	                  | j                  |	            }
||v rt        j                  |
||         }
|j                  |
       |
||<   n|j                  |	       |	||<    t        |      dk7  sJ |d   j                         | _        t        | 5  d t        | j                        |t        |      |       || _        || _        | j%                         \  }}}}|j&                  D cg c]	  }||v s| c}| _        ddlm} t        |      dkD  r|d   j.                  ni } ||i |||      D cg c]  }||   	 c}| _        | j0                  D cg c]#  }t3        t        | j                        ||       % c}| _        t6        j8                  j;                  |        y c c}w c c}w c c}w )Nr   rq  )identify_mutated_tensors)rX  rr   rm   r  r  r  r  ry  r/  r   r   r   ri  r  r  r   r]  rk  ra  rV  r;  r[  rx  r   mutable_argsr  r@  r`   r   r  )rU  r]  rk  tma_descriptor_metadatakernel_argsrC  r   r7  r  rS  r   r+  r^  r   r   rx  autotuned_kwargsr(  r6  rk  s                      rv   r  z UserDefinedTritonKernel.__init__  s    %%' 		DAq!Y' 99$:L:LQ:OP//%,,Q0G0JKAa q	$$Q'q			 6{aQi**,dkk*- 	
 %	 $ < < >A "++.
sk/AC.
* 	X03Gq0@71:,,b 0;;;*:;=T
 
 ((!
 :T[[93E!
 	
""4(%.

!
s   '	G$1G$2G)(G.c                ,    t        | j                        S rq   )r   r@  r[  s    rv   rJ  z#UserDefinedTritonKernel.get_outputs  s    D))**rx   c                    | j                   S rq   rq  r[  s    rv   r   z"UserDefinedTritonKernel.get_device  rr  rx   r  r  r(  rS  rQ  r  )r   r   r   ra  r\  r  rL  r  rJ  r   r  r  s   @rv   rP  rP  ]  sG    G>N
b %*
!
	!
3)	3)j+rx   rP  c                  D     e Zd ZdZddZddZd	dZd
dZd fdZ xZ	S )InplaceBernoulliFallbackE
    This needs to be a custom class to handle mutation properly
    c                   d | j                   D        \  }t        j                  j                  r\|j	                  | j                          d| ddj                  t        t        | j                               d|j                          y |j	                  | j                          d| ddj                  t        t        | j                               d|j                          y )Nc              3  <   K   | ]  }|j                           y wrq   r  r  r   s     rv   r  z3InplaceBernoulliFallback.codegen.<locals>.<genexpr>  s     ;!##%;rW  rx  r  z, NULL)r   )rC  r`   r   r`  r  rm  r{  ry  reprr7  ending)rU  rV  ru   s      rv   r\  z InplaceBernoulliFallback.codegen  s    ;t{{;77 '')*!A3b3tTEWEW;X1Y0ZZabibpbpaqr '')*!A3b3tTEWEW;X1Y0ZZ[\c\j\j[klrx   c                     yr  r   r[  s    rv   r  z(InplaceBernoulliFallback.should_allocate(  r  rx   c                >    | j                   d   j                         gS r  rC  r  r[  s    rv   r  z+InplaceBernoulliFallback.get_mutation_names+      A'')**rx   c                    t               S rq   r/   r[  s    rv   rL  z1InplaceBernoulliFallback.get_unbacked_symbol_defs.  rM  rx   c                ^   t         |   d t        |j                               | j	                  |g      ||       t
        j                  j                  |j                                t
        j                  j                  |       | _
        t
        j                  j                  |        y )Nrq  r<  )ri  r  r  r   r  r`   r   r  r  r  r   r  )rU  r<  ru   r7  rk  s       rv   r  z!InplaceBernoulliFallback.__init__1  s~    alln-$# 	 	
 	
##AJJL1GG++D1		""4(rx   r  r  r,  rS  
r   r   r   r  r\  r  r  rL  r  r  r  s   @rv   r  r    s&    +
) 
)rx   r  c                  \     e Zd ZdZddZd	dZd
dZddZ	 	 d fdZe	ddd       Z
 xZS )InplaceCopyFallbackr  c                R    | j                         \  }}}|j                  |||       y rq   )r  codegen_device_copy)rU  rV  r  r  non_blockings        rv   r\  zInplaceCopyFallback.codegenD  s)    #'#4#4#6 c<##Cl;rx   c                     yr  r   r[  s    rv   r  z#InplaceCopyFallback.should_allocateH  r  rx   c                >    | j                   d   j                         gS r  r  r[  s    rv   r  z&InplaceCopyFallback.get_mutation_namesK  r  rx   c                    t               S rq   r/   r[  s    rv   rL  z,InplaceCopyFallback.get_unbacked_symbol_defsN  rM  rx   c                   t         |   d |||dd       t        j                  j	                  |d   j                                t        j                  j                  |       | _        t        j                  j                  |        y )Nz
aten.copy_aoti_torch_copy_)r9  r:  r   )	ri  r  r`   r   r  r  r  r   r  )rU  r*  rC  r7  rk  s       rv   r  zInplaceCopyFallback.__init__Q  sr     	+. 	 	
 	
##F1I$6$6$89GG++D1		""4(rx   c                    ||fD cg c]  }| j                  |       }}|f}t        t        |j                               ||      }|S c c}w r6  )r  r  r  r   )rz  r  r  r  r   rC  r7  r  s           rv   ry  zInplaceCopyFallback.createc  sV    14c
;1###A&;;%$cnn./

  <s   Ar  r  r,  rS  r  )r  r   )r   r   r   r  r\  r  r  rL  r  r  ry  r  r  s   @rv   r  r  ?  s?    <+)
 
)$  rx   r  c                  8    e Zd ZdZddZd	dZd
dZddZd	dZy)MutatingFirstArgExternKernelr  c                    g d | j                   D        t        t        | j                        }|j	                  | j                          ddj                  |       d|j                          y )Nc              3  <   K   | ]  }|j                           y wrq   r  r  s     rv   r  z7MutatingFirstArgExternKernel.codegen.<locals>.<genexpr>v  s     9a!!#9rW  rx  r  r   )rC  ry  r  r7  r  rm  r{  r  )rU  rV  argrefss      rv   r\  z$MutatingFirstArgExternKernel.codegent  sl    
9T[[9
t))*
 	##%&a		'(:';1W^^<LM	
rx   c                     yr  r   r[  s    rv   r  z,MutatingFirstArgExternKernel.should_allocate}  r  rx   c                >    | j                   d   j                         gS r  r  r[  s    rv   r  z/MutatingFirstArgExternKernel.get_mutation_names  r  rx   c                    t               S rq   r/   r[  s    rv   rL  z5MutatingFirstArgExternKernel.get_unbacked_symbol_defs  rM  rx   c                     yr  r   r[  s    rv   has_side_effectsz-MutatingFirstArgExternKernel.has_side_effects  rp  rx   Nr  r  r,  rS  )	r   r   r   r  r\  r  r  rL  r  r   rx   rv   r  r  o  s     
+rx   r  c                        e Zd Zd fdZ xZS )ResizeStorageBytesc                *   t        |t              sJ d       t        |   d t	        |j                               | j                  |g      |f       t        j                  j                  |j                                t        j                  j                  |       | _        t        j                  j                  |        d| _        d| _        t        j                  j                   j#                  |j$                  j                                y )NzTODO: dynamic shapesrq  )r7  z"inductor_ops.resize_storage_bytes_z&torch::inductor::resize_storage_bytes_)rr   rs   ri  r  r  r   r  r`   r   r  r  r  r   r  r9  r:  never_reuse_buffersr  r)  )rU  variabler  rk  s      rv   r  zResizeStorageBytes.__init__  s    (C(@*@@(h1134
+#+	 	 	
 	
##H$5$5$78GG++D1		""4("FG	##''(>(>(@Arx   r  r  r  s   @rv   r  r    s    B Brx   r  c                  (     e Zd Zd fdZddZ xZS )SetSourceTensorKernelc                   |j                          t        | 	  |j                         ||gdt        j
                  j                  j                  j                         t        j                  j                  j                  |j                  j                                t        j                  j                  j                  |j                                t        j                  j                  j                  | j                                |j                         }t!        t#        |      ||       t!        t#        |      ||       g| _        y )Nz!torch.ops.aten.set_.source_Tensor)r9  r<  rq  )r  ri  r  r   r   r^   r^  set_source_Tensorr`   r   r  r  r)  r  r   r  r  r@  )rU  self_tensorstorage_tensorr   rk  s       rv   r  zSetSourceTensorKernel.__init__  s    $$&%%'.)B		++99	 	 	
 	
##''(8(8(A(A(CD	##''(?(?(AB	##''8**,:V4k4H:V4ndK!
rx   c                v    | j                   d   j                         | j                   d   j                         gS r  r  r[  s    rv   r  z2SetSourceTensorKernel.get_inputs_that_alias_output  s/    A'')4;;q>+B+B+DEErx   r  r,  )r   r   r   r  r  r  r  s   @rv   r  r    s    
"Frx   r  c                  Z     e Zd ZdZd
dZddZddZddZddd	 	 	 	 	 	 	 d fd	Z xZ	S )ScatterFallbackz
    This needs to be a custom class to handle mutation properly.
    This class handles both aten.scatter_ and aten.scatter_reduce_.
    It also handle the case `src` being a scalar properly.
    c           
        | j                   d   }t        j                  j                  rddd}||v r||   }| j                  rd | j
                  D        \  }}}n%d | j
                  D        \  }}| j                  d   }|j                  ||| j                  d   ||g| j                  | j                  | j                  || j                                y )	Nrc  r  r  )r  multiplyc              3  <   K   | ]  }|j                           y wrq   r  r  s     rv   r  z*ScatterFallback.codegen.<locals>.<genexpr>  s     Jq224JrW  c              3  <   K   | ]  }|j                           y wrq   r  r  s     rv   r  z*ScatterFallback.codegen.<locals>.<genexpr>  s     EA!--/ErW  r5   r   )r   r`   r   r`  src_is_tensorrC  r7  generate_scatter_fallbackr:  r9  r  )rU  rV  rc  get_operator_enumru   r   r  s          rv   r\  zScatterFallback.codegen  s    X&77(-6 B***62JdkkJOQsEEJQ$$Q'C))""1%uc2  ##!	
rx   c                     yr  r   r[  s    rv   r  zScatterFallback.should_allocate  r  rx   c                >    | j                   d   j                         gS r  r  r[  s    rv   r  z"ScatterFallback.get_mutation_names  r  rx   c                    t               S rq   r/   r[  s    rv   rL  z(ScatterFallback.get_unbacked_symbol_defs  rM  rx   NTrc  include_selfc          
     f   t        |t              | _        | j                  r%|||fD cg c]  }| j                  |       }	}|f}
n$||fD cg c]  }| j                  |       }	}||f}
t        |   d t        |j                               | j                  |	      |
||dt        |      ddg|       t        j                  j                  |j                                t        j                  j                  |       | _        t        j                  j!                  |        y c c}w c c}w )Nrq  r  rc  r  )r9  r;  r<  )rr   rm   r  r  ri  r  r  r   r  r   r`   r   r  r  r  r   r  )rU  r<  ru   r  r   r  rc  r  r   tensorsr7  rk  s              rv   r  zScatterFallback.__init__  s    (Y7 78%oFt))!,FGF FM78%jAt))!,AGA #JMalln-(|<";/+3^*D# 	 		
 	
##AJJL1GG++D1		""4(% G Bs   D)D.r  r  r,  rS  )r  rs   rc  r  r  r   r   r   r  r  s   @rv   r  r    sV    
.+ !%!!) 	!) !) !) 
!) !)rx   r  c                  D     e Zd ZdZddZddZd	dZd
dZd fdZ xZ	S )IndexPutFallbackzQ
    This needs to be a custom class to handle mutation and indices properly
    c                   d | j                   D        ^}}}g }t        |      }t        | j                        D ]b  \  }}| j                  |   |j	                  t        |             0|j	                  t        j                  j                  j                         d  |j                  | j                         |||g| j                           y )Nc              3  <   K   | ]  }|j                           y wrq   r  r  s     rv   r  z+IndexPutFallback.codegen.<locals>.<genexpr>  s     &Rq':':'<&RrW  )rC  rr  r   r4  r/  rs  r`   r   r  r4  generate_index_put_fallbackrm  r  )	rU  rV  ru   r   valid_indicesr4  iter_valid_indicesr   r   s	            rv   r\  zIndexPutFallback.codegen  s    &Rdkk&R#F]!-0dll+ 	>DAq||A*t$678qww33<<=		> 	,++  "Aw	
9=9P9P9R	
rx   c                     yr  r   r[  s    rv   r  z IndexPutFallback.should_allocate  r  rx   c                >    | j                   d   j                         gS r  r  r[  s    rv   r  z#IndexPutFallback.get_mutation_names  r  rx   c                    t               S rq   r/   r[  s    rv   rL  z)IndexPutFallback.get_unbacked_symbol_defs  rM  rx   c           	     
   || _         |D cg c]  }||	 }}||g|D cg c]  }| j                  |       }}d}	t        
|   d t	        j                               | j                  |      |fd|	|       t        j                  j                  | j                  d   j                                t        j                  j                  |       | _        t        j                  j                  |        y c c}w c c}w )Naoti_torch_index_put_outrq  zaten.index_put_)r9  r:  r<  r   )r4  r  ri  r  r  r   r  r`   r   r  rC  r  r  r   r  )rU  r<  ru   r4  r   
accumulater   r  r  r:  rk  s             rv   r  zIndexPutFallback.__init__  s    $+=qq}==34f2M}2MNQ4%%a(NN4alln-(M0+# 	 	
 	
##DKKN$;$;$=>GG++D1		""4( >Ns   C;C;D r  r  r,  rS  r  r  s   @rv   r  r    s&    
+) )rx   r  c                  $    e Zd Zed        ZddZy)
DeviceCopyc                   |j                         sKt        d |j                         D              r+t        j                  j
                  s|j                  |      S t        j                  j                  |       t        j                  j                  |j                                t        d       |f}t        t        ||j                         |j                               | j!                  |      g|      S )Nc              3  T   K   | ]   }|t         j                  j                  v  " y wrq   )r`   r   r  r  s     rv   r  z$DeviceCopy.create.<locals>.<genexpr>0  s     GqA***Gs   &(zDeviceCopy in input programr#  )r  r  rf  r6   aot_inductoruse_runtime_constant_foldingr  r`   r   add_device_infor   rO   r  r   r   r   r  )rz  ru   r   r  r7  s        rv   ry  zDeviceCopy.create,  s     GA4D4D4FGG''DD''//	'	/78%kkmZZ\
 q!"
 	
rx   c                   | j                         }t        |      dk(  sJ | j                  r2|j                  |d   | j                  j	                         |d          y |j                  |d   | j	                         |d          y )Nr   r   r5   )r  r   r8  r  r  )rU  rV  r   s      rv   r\  zDeviceCopy.codegenD  s{      "4yA~~''Q));;=tAw ''Q1G1G1I4PQ7Srx   Nr  )r   r   r   r  ry  r\  r   rx   rv   r  r  +  s    
 
.Trx   r  c                  D     e Zd ZdZddZddZd	 fdZd
dZd	dZ xZ	S )r   z;
    The result of a call to aten._local_scalar_dense.
    c                    t               S rq   r/   r[  s    rv   re  zDynamicScalar.get_readsT  rM  rx   c                     yr  r   r[  s    rv   r  zDynamicScalar.should_allocateW  r  rx   c                    |j                          t        | 	  d t        t	        j
                  d            | j                  |g             || _        || _        y Nr   rq  )	r  ri  r  r  r   r   r  symkeypath)rU  r  r  r)  rk  s       rv   r  zDynamicScalar.__init__Z  sK    *ELL$78$:M:Mtf:U	
 rx   c                .    t        | j                  g      S rq   )r0   r  r[  s    rv   rL  z&DynamicScalar.get_unbacked_symbol_defsb  s    488*%%rx   c                &    |j                  |        y rq   )codegen_dynamic_scalarr[  s     rv   r\  zDynamicScalar.codegene  s    &&t,rx   r%  r  r  rS  )
r   r   r   r  re  r  r  rL  r\  r  r  s   @rv   r   r   O  s!    &-rx   r   c                  N     e Zd ZdZddZd	dZd
 fdZd	dZdddZd
dZ	 xZ
S )r   z5
    The result of a call to aten._assert_scalar
    c                    t               S rq   r/   r[  s    rv   re  zAssertScalar.get_readsn  rM  rx   c                     yr  r   r[  s    rv   r  zAssertScalar.should_allocateq  r  rx   c                ~    t         |   d t        t        j                  d            g        || _        || _        y r  )ri  r  r  r   r   scalarr  )rU  r  r  rk  s      rv   r  zAssertScalar.__init__t  s7    ell512	
 rx   c                     yr  r   r[  s    rv   r  zAssertScalar.has_side_effects  rp  rx   c                .    t        | j                  |      S rq   )r   r  r  s     rv   r  z!AssertScalar.get_free_symbol_uses  s    ];;rx   c           	        t         j                  sy t        t        | j	                  d                  }t
        j                  j                  rad| d}t
        j                  j                  j                  | j                  d      }|j                  d| d| j                   d| d	       y t
        j                  j                  j                  | j                  d      }|j                  d
| d       |j                  dt        | j                         d       |j                  | j                          d       y )NFrB  zstd::to_string(r   )r  zif (!(z()) { throw std::runtime_error("Expected z but received " + z); }zif not (z):z    raise RuntimeError(z = None)r6   scalar_assertsrs  rr  r  r`   r   r`  r  codegen_cpp_sizevarr  r  r  codegen_python_sizevarr  r  )rU  rV  symbol
symbol_strsizevars        rv   r\  zAssertScalar.codegen  s$   $$ d44454IJK77*6(!4Jgg**>>e ? G 	!J488*Tfgqfrrwx gg**AAe B G 	45 7TXX7GqIJ  19:rx   r%  r  r  r  )r   r   )r   r   r   r  re  r  r  r  r  r\  r  r  s   @rv   r   r   i  s&    	<;rx   r   c                  "    e Zd ZU ded<   ded<   y)ExternKernelNoder   r   zexport_schema.Noder   Nr   r   rx   rv   r  r    s    
I
rx   r  c                       e Zd ZdZ	 ddd	 d fdZd fdZddZddZd Ze	d	        Z
d
 Zd ZddZd ZddZe	dd       Zed        Z fdZ xZS )FallbackKernelz
    A class that represents a fallback kernel for handling operators that are not
    directly support by inductor. It currently supports functional ops, view ops,
    inplace aten ops, and mutating ops that are auto-functionalizable.
    Nr?  c               \    t            |t        |      t        |      |       d _        | _        t        |t        j                  j                  t        j                  j                  f      sJ d| dt        |       d       | _        | _        |i n| _        t        j                  j!                   j"                         g  _        g  _        t         j                  t        j                  j                        ry d j                  j)                         v ry  j                  j*                  }t        j,                  j.                  j1                   j                        r- j&                  j3                  |d   j5                                y |j6                  rt9        |      st;        d|        j                   j<                   j>                        \  }	}d
 fd	}
t        j,                  j.                  jA                  ||	|      D ]  \  }} |
||        y )Nr  Fz#Fails to create FallbackKernel for r   z not supported_c10d_functionalr   z'NYI: Can't generate FallbackKernel for c                    t         j                  t        j                        rt        |t        t
        f      sJ t        j                   j                        rt        |t
        t        f      rJ |y  j                  y d fd}t        j                   j                        r||D ]
  } ||        y y t        j                   j                        sJ  ||       y )Nc                   j                   j                  | j                                j                  j                  r?j
                  j                  t        t        | j                               |              y y r6  )	alias_namesr/  r  
alias_infois_writer@  r  r  r   )r   inforU  s    rv   	add_aliaszPFallbackKernel.__init__.<locals>.handle_aliasing_and_mutation.<locals>.add_alias	  sZ      ''

5??++))00&z'H!TR ,rx   r  )
rr   r   r   ListTyper   r   library_utilsis_tensor_like_typer  is_tensorlist_like_type)r  r   r  optional_tensor_argrU  s   `   rv   handle_aliasing_and_mutationz=FallbackKernel.__init__.<locals>.handle_aliasing_and_mutation  s    $))U^^4!#e}55500; &cE4=999{& 44TYY??/2 7+!"567 # %88CCC#rx   r  )!ri  r  r   use_runtime_dispatchr?  rr   r   rJ  rK  rh  r   r<  r}  r   r`   r   warn_fallbackr9  r  r  r   rL  _libraryr  mutates_and_returns_first_argr/  r  
is_mutabler   r  rC  r7  
zip_schema)rU  r*  r+  r  nontensor_argsr}  r   r?  schemar   r	  r  r   rk  s   `            rv   r  zFallbackKernel.__init__  s    	+.!	 	 	
 %*!!2

%%

..
 	X 14<.W	X 
 ","Nb	d556 '))+d&&

(F(FG !1!1!6!6!88
 !!)) >>==d>N>NO&&{1~'>'>'@A%;F%C%9&B  **4;;8J8JKf	: --88vN 	4ID#(s3	4rx   c                ,   t         |          }| j                  t        j                  j
                  j                  u rT| j                  D ]E  }t        |t              s|j                  t        j                  |j                                     }G |S rq   )ri  r  r<  r   _prims	rng_primsgraphsafe_run_with_rng_stater7  rr   r  	with_readr7   rM  r  )rU  r3  r   rk  s      rv   r  zFallbackKernel.get_read_writes  sy    g-/u||55RRR)) c>2"-"7"7$,,S\\^<#K rx   c           	     n    |j                  | j                         | j                  t        | dd             S Nr?  )(codegen_unbacked_symbol_defs_for_outputsr  r  r   r[  s     rv   codegen_unbacked_symbol_defsz+FallbackKernel.codegen_unbacked_symbol_defs'  s0    ??MMOT\\749Ld+S
 	
rx   c                    t        | dd       x}rBt        t        j                  j                  j
                  |      }|J |j                         S t               S r	  r   r+   r`   r   r   r   rJ  r0   rU  r?  resolveds      rv   rL  z'FallbackKernel.get_unbacked_symbol_defs,  Z     '.A4 HHH0  **,=H '''==?"<rx   c                ,   t         j                   G d d             }| j                  D cg c]  } ||j                                }}| j	                  || j
                        \  }}t        j                  j                  rt        | j                  t        j                  j                        r| j                  ||      }t        | j                  j                   j"                  |      D cg c]9  \  }}t        j                  j$                  j'                  ||j(                        ; }}}n6|D cg c]+  }t        j                  j$                  j'                  |      - }}| j*                  j-                  |       |S c c}w c c}}w c c}w )Nc                       e Zd ZU ded<   ddZy))FallbackKernel.codegen_args.<locals>.Shimr   refc                    | j                   S rq   )r	  r[  s    rv   r  z2FallbackKernel.codegen_args.<locals>.Shim.__repr__;  s    xxrx   Nr  )r   r   r   r   r  r   rx   rv   Shimr	  7  s    H rx   r	  )r-  	dataclassrC  r  r}  r7  r`   r   r`  rr   r<  r   rJ  rK  r  r   rL  rM  r  r  rO  r   r  )rU  r	  ru   r  r   r   params          rv   r  zFallbackKernel.codegen_args6  sH   				  	  
	  =AKKHqtA//12HH**;8J8JKf77:d.>.>

@U@U#V..tV<D !$D$4$4$<$<$F$F ME1 $$33AuGD 
 EIIqAGG((77:IDI 	6" I
 Js   F1>F70Fc                   | r!| D cg c]  }t        |t              r| c}nd }|r3| D cg c]#  }|j                         s|j                         % }}|d   S t        |t        j                        r|j
                  S t        |t        t        f      r\t        d |D              }|D cg c]  }|s|	 }}t        |      dk(  r|d   S |D ]  }t        |j                        s|c S  |d   S y c c}w c c}w c c}w )Nr   c              3  H   K   | ]  }t         j                  d |        y wrq   )r  find_devicer'  s     rv   r  z-FallbackKernel.find_device.<locals>.<genexpr>Z  s"      $89**43$re  r5   )rr   r  r   r   r  r   r   r   r0   r   rW   r   )r  r  r   non_torch_bind_tensor_argsr   devices
device_setr   s           rv   r#	  zFallbackKernel.find_deviceM  s     $J1:a+IQJ 	#
 &3>SC#..BRs~~'SGS1:nell3!(((ntUm4# $=K$ J -7A&&vAGA7|q qz!! "&++&!M" 1:+ K
 T Bs!   C4C4C9C9-C>5C>c                    t        | j                  t        j                  j                        ryt        | j                        j                         S r  )rr   r<  r   rJ  rh  r$   r	  r[  s    rv   r  zFallbackKernel.has_side_effectsg  s9    d&&

(F(FGt//0;;==rx   c                    | j                   S rq   )r  r[  s    rv   r  z+FallbackKernel.get_inputs_that_alias_outputl  rm  rx   c                N    t        | j                        dk  sJ | j                  S r~  )r   r  r[  s    rv   r  z!FallbackKernel.get_mutation_nameso  s'    4&&'1,,,"""rx   c           	        t         j                  d| j                         | j                         t	        | t
              sJ | j                  | j                  | j                        \  }}| j                  ||      }| j                  D cg c]  } | j                  |fi | }}| j                  }t        j                  j                  sg ||S t        dd      }|j!                  |||      }d }t	        |t"        j$                  j&                  j(                        r#|j+                  |d   |d         j,                  }	n|j.                  j,                  }	t1        |	      dk(  r>| j2                  r| j2                  n| j4                  }
|	d   j6                  } |||
      g}n9t9        |	| j2                        D cg c]  \  }} ||j6                  |       }}}t;        | j                         t=        j>                  | j                  jA                         ||i             }t        j                  jB                  jE                  |       g ||S c c}w c c}}w )a  
        ProxyExecutor Design Note
        We export the ExternFallbackNodes (for custom ops) into a serialized file
        and run it with a host side proxy executor to address the ABI problem
        This is currently only implemented for fbcode. Eventually, we will also make this work for OSS.
        Detailed design doc can be found at
        https://docs.google.com/document/d/1wC4DOZFaYym2t1Esz0X5yxlLI3RDnSiyRbUus3bkJ64/edit?usp=sharing
        z4Extern kernel node added for node %s with target %s.Nc           	     D   t        | t        j                  t        j                  f      r|}t        |t        t
        f      rt        |      dk(  sJ |d   }t        | t        j                        rBt        j                  j                  t        j                  |j                                     S |J t        j                  j                  d      S t        | t        j                        rxt        | j                         t        j                        rPt        j                  j                  |D cg c]&  }t        j                  |j                               ( c}      S t        | t        j                        rt        | j                         t        j                        r|>t        j                  j                  t        j                  j                  d            S t        j                  j                  t        j                  j                  t        j                  |j                                           S t        | t        j                         r t        j                  j                  |	      S t#        d
t%        |              c c}w )Nr5   r   r_  )	as_tensorT)as_none)
as_tensors)as_optional_tensor)as_intzUnsupported return type )rr   r   
TensorTypeNoneTyper   r   r   export_schemaArgumentry  TensorArgumentr  r  getElementTypeOptionalTypeOptionalTensorArgumentIntTypeRuntimeErrorr   )return_typer7  r   s      rv   handle_single_outputzFFallbackKernel.export_extern_kernel_node.<locals>.handle_single_output  s-   +(8(8%..'IJftUm4v;!+++ )Ck5+;+;<(1188"/">">CLLN"S 9   ;&;(11888FFK8Z**,e.>.>> %--44 $*  &44#,,.I  5   K););<**,e.>.>B >(1188+8+O+O+V+V$( ,W , 9   )1188+8+O+O+V+V&3&B&B%+__%6' ,W , 9   K7$--44F4CC"%=d;>O=P#QRR5 s   7+Jr   r5   )r  rC  r  metadata)r   r   )#rE  rF  r  r<  rr   r  r}  rC  r7  r  r;  r  r`   r   aot_moder   serialize_inputsr   r  	torchbindCallTorchBindr
	  returnsrL  r   r  r@  rO  r   r  r3	  rb   r   extern_kernel_nodesr/  )rU  r   r   r(  ordered_kwargsr  
serializernamed_argumentsr<	  rB	  r  r;	  output_argumentsreturn_schemar7  r   s                   rv   export_extern_kernel_nodez(FallbackKernel.export_extern_kernel_nodes  s0    			BMMO	
 $///**4;;8J8JKf**48 99
 "D!!#00
 
 !!ww+T+N++*46
$55fdFK-	S^ fe55??MMNmmDGT!W5==Gnn,,Gw<1 '+lldll8M8MG!!*..K 4[' JK .1$,,-G )M6 %]%<%<fE   
  ##'',,.&(	
 	
##**40''''w
T s   IIc                     j                   }|j                  dk(  rqt        |t        j                  j
                        sJ t        j                  j                  rddl	m
} t        |      |vrt        j                  d|       d _        np|j                  dk(  r&t        |t        j                  j
                        s=J t        j                  j                  r!|t        j                   j"                  v _        t        j                  j                  rt        |t        j                  j
                        r j                  sdfd j%                   j&                   j(                        \  }t+        j,                  | fd j.                  D              }t1        fd	t3        ||j4                  j6                        D               _         j9                  |        j                  rn j;                         }|j=                   j?                          j@                   fd
 j                   | jB                  r jB                  n jD                         nM|jG                          t         jH                  tJ              r" jM                  |        jO                  |        jQ                  |       y )Nr^  r   )inductor_fallback_opszG%s is missing a c-shim implementation, using proxy executor as fallbackT
_quantizedc                    t        | t        j                        r | j                               S t        | t        j                        S rq   )rr   r   r7	  r6	  
NumberType)r   	is_numbers    rv   rO	  z)FallbackKernel.codegen.<locals>.is_number  s:    a!3!34$Q%5%5%788!!U%5%566rx   c              3  D   K   | ]  } j                   |fi   y wrq   )r  )r  r  r   rU  s     rv   r  z)FallbackKernel.codegen.<locals>.<genexpr>  s+       *D))!6v6s    c              3  l   K   | ]+  \  }}t        |t              xr  |j                         - y wrq   )rr   complexrO  )r  rS  r  rO	  s      rv   r  z)FallbackKernel.codegen.<locals>.<genexpr>  s5      ,Aq 1g&A9Q[[+AA,s   14c                 H    g  j                          j                         S rq   )r  r  r[  s   rv   r&  z(FallbackKernel.codegen.<locals>.<lambda>"  s$    F$++-F0C0C0EF rx   )r   ztorch.JitTyper   r   ))r<  ra  rr   r   rJ  rK  r`   r   r`  torchgen.aoti.fallback_opsrK	  r   rE  r  r	  r6   r  custom_ops_to_c_shimsr}  rC  r7  rd  r  r;  r  r   rL  rM  rY  rI	  ,generate_fallback_kernel_with_runtime_lookupr  r9  r  r@  generate_fallback_kernelr*  r  r  r  r	  )	rU  rV  r+  rK	  r   	args_iterexported_argsrO	  r   s	   `      @@rv   r\  zFallbackKernel.codegen  s   !!v%fejj&;&;<<<ww""Lv;&;; KKa 15D--fejj&;&;<<<WW   f11GGG % GG65::#8#89--7  ..t{{D<N<NOLD& "!??I ), ,	6>>+C+CD, )D%
 	W%$$ ::<M@@''F   $$2G2G ,,T2$++v.))'2..w7))'2rx   c           	         t        | j                  | j                  t        | j	                               t        | j                                     S rq   )r,  r   r   rM   r   r   )r7  s    rv   tensor_to_layoutzFallbackKernel.tensor_to_layout0  s9    MMLL%fkkm4%fmmo6	
 	
rx   c                    t         j                  f}||vrt        j                  j                  n	t               }|5    j                  |g|i |\  }}}}	}
d d d        t        d D               j                  |      }|sCt        |t        j                  j                  j                        rt        j                  d      }|  t        |      ||	
      n!|sJ d         t!        |      ||	
       fd |g       }t        |t"        t$        t&        f      r	|_        |S |g_        |S # 1 sw Y   xY w)Nc              3  2   K   | ]  }t        |        y wrq   )rf  r  s     rv   r  z(FallbackKernel.create.<locals>.<genexpr>J  s     !K,s"3!Kr(  r   rq  r  z"Not sure where to find device infoc                    t         t        t        f      r. t                fdt	        t                     D              S t         t              r: j                         D ci c]  \  }}| |t               |fgz           c}}S t         t        j                        rnt        j                               }t        j                  sst               s3t        j                   j"                  j%                  |j&                         |S t         t(              r S t         t        j*                        r j,                  j.                  S  J dt                d       y c c}}w )Nc              3  T   K   | ]  } |   t              |fgz          ! y wrq   )r   )r  r   generate_outputr4  r7  s     rv   r  zAFallbackKernel.create.<locals>.generate_output.<locals>.<genexpr>k  s5      $ $F1Iw4<:K9L/LM$r  zFallbackKernel output type z is not supported)rr   r   r   r   r   r   r   rX  r   r  MultiOutputr[	  r6    assume_unaligned_fallback_outputr]   r`   r   rg  r  r   rs   SymIntr   r  )	r7  r4  r(  ru  r6  rz  r`	  has_unaligned_inputpackeds	   ``   rv   r`	  z.FallbackKernel.create.<locals>.generate_outputi  sH   &4-0#tF| $"3v;/$   FD) %+LLN S g$v,9L8M.MNN  FELL1!((0 ;;*,V4GG--11#((;
FC(FELL1{{'''~ 1$v,?PQ~ 3s   +#E3)r^  *_fused_moving_avg_obs_fq_helper_functionalr`   r   rl  r   r  r  r#	  rr   r   r  r@	  rA	  r   r  r   r   r   r   r  )rz  r+  r   r   fake_incorrect_kernelscontextr  r  r  r}  r?  r   r  r`	  rd	  re	  s   `            @@@rv   ry  zFallbackKernel.create9  sj   "&"Q"Q!S!'/E!EAGG;= 	  	< #""6;D;F;!	< "!K{!KKn=*E++55CC
 \\%(F!&)"3F ???6!0"3F 	D "."5geT23$FN  &YFNc	< 	<s    E		Ec                     t         |          S rq   )ri  rS  rj  s    rv   rS  zFallbackKernel.apply_constraint  s    w'))rx   rq   r  r$  rS  r,  )r7  r  )r   r   r   r  r  r  r	  rL  r  r/  r#	  r  r  r  rI	  r\  r[	  r  ry  rS  r  r  s   @rv   r  r    s     j4 j4 
j4X


 .  2>
 #m(^L3\ 
 
 V Vp* *rx   r  c                  <     e Zd ZdZddZddZdd	 d	 fdZ xZS )
ComplexViewz9View a complex number as two dtyped numbers or vice versac                     yr  r   r[  s    rv   r  zComplexView.should_allocate  r  rx   c                >    | j                   d   j                         gS r  r  r[  s    rv   r  z(ComplexView.get_inputs_that_alias_output  s    A'')**rx   Nr  c               0    t         |   ||||||       y )Nr  )ri  r  )rU  r*  r+  r  r		  r}  r?  rk  s          rv   r  zComplexView.__init__  s)     	/ 	 	
rx   r  r,  r  )r   r   r   r  r  r  r  r  r  s   @rv   rk	  rk	    s)    C+ 
 

 
rx   rk	  c                       e Zd ZU ded<   ddZy)r   r  r   c                    | j                   S rq   rq  r[  s    rv   r   zMultiOutputLayout.get_device  rr  rx   Nr  )r   r   r   r   r   r   rx   rv   r   r     s    rx   r   c                  X     e Zd ZddZ	 d	 	 	 	 	 d fdZ	 d	 	 	 d	dZd
dZddZ xZS )ra	  c                    |j                  |        | j                  s#| j                  |       | j                  |       y y rq   )codegen_multi_output!skip_size_stride_alignment_checksr  r  r[  s     rv   r\  zMultiOutput.codegen  s:    $$T*55%%g.**73 6rx   c                    t         |   d ||gd       t        j                  j	                  |       | _        t        j                  j                  |        || _        || _        y rC  )	ri  r  r`   r   r  r   r  r4  rt	  )rU  r*  r
  r4  rt	  rk  s        rv   r  zMultiOutput.__init__  sQ     	vw3GG++D1		""4(1R.rx   c                >    | j                   d   j                  |      S r  )rC  r  r  s     rv   r  z MultiOutput.get_free_symbol_uses  s     {{1~22=AArx   c                p    t        | j                        dk(  rt        | j                  d   t              ryy)Nr5   r   TF)r   rC  rr   r  r[  s    rv   r  zMultiOutput.should_allocate  s,    t{{q t{{1~'89rx   c                    | j                   D cg c]>  }t        |t              r,t        |j	                               dkD  r|j                         @ c}S c c}w r  )rC  rr   r  r   r  r  )rU  rE  s     rv   r  z(MultiOutput.get_inputs_that_alias_output  sN     {{
#~.C4467!; LLN
 	
 
s   AAr  r  )r*  r   r4  zlist[tuple[Any, ...]]r   r   r(  r  r,  )	r   r   r   r\  r  r  r  r  r  r  s   @rv   ra	  ra	    sY    4 +0SS '	S 
S %*B!B	!B

rx   ra	  c                     e Zd ZU dZded<   d,dZd-dZd.dZd/dZd0dZ	d1d	Z
d2d3dZd4dZd5dZd6dZd,dZd5dZ	 d7	 	 	 	 	 d8dZd9dZd:dZ	 d7	 	 	 	 	 d;dZd<dZd=dZd>dZd?dZd@dZdAdZd,dZd,dZdBdZdCdZd1dZdCd Z d@d!Z!	 d7	 	 	 dDd"Z"dEd#Z#dFd$Z$d2dGd%Z%e&dHd&       Z'dId'Z(dHd(Z)dJd)Z*e&d*        Z+d1d+Z,e,Z-y
)Kr  zC
    TensorBox / StorageBox allow in-place mutation of Tensors
    rn   r)  c                6    | j                   j                         S rq   r  r[  s    rv   r  z!MutableBox.has_exceeded_max_reads  r  rx   c                6    | j                   j                         S rq   r{  r[  s    rv   r   zMutableBox.get_device  ry  rx   c                6    | j                   j                         S rq   r"  r[  s    rv   r  zMutableBox.make_loader      yy$$&&rx   c                6    | j                   j                         S rq   )r)  r  r[  s    rv   r  zMutableBox.make_indexer  r  rx   c                6    | j                   j                         S rq   )r)  r  r[  s    rv   r  zMutableBox.get_stride  ry  rx   c                6    | j                   j                         S rq   r~  r[  s    rv   r  zMutableBox.get_name  r  rx   Nc                8    | j                   j                  |      S rq   )r)  r  r  s     rv   r  zMutableBox.has_large_inner_fn  s    yy++I66rx   c                8    | j                   j                  |      S rq   r  r  s     rv   r  zMutableBox.mark_reuse  r  rx   c                6    | j                   j                         S rq   r  r[  s    rv   r  zMutableBox.realize_hint  r  rx   c                6    | j                   j                         S rq   )r)  r  r[  s    rv   r  zMutableBox.unwrap_view  r}	  rx   c                6    | j                   j                         S rq   )r)  r  r[  s    rv   r  zMutableBox.is_input_buffer      yy((**rx   c                6    | j                   j                         S rq   )r)  r  r[  s    rv   r  zMutableBox.freeze_layout  s    yy&&((rx   c                :    | j                   j                  ||      S rq   )r)  r  r  s      rv   r  z*MutableBox.freeze_layout_with_stride_order  s     yy88NNrx   c                8    | j                   j                  |      S rq   )r)  r  r  s     rv   r  z(MutableBox.freeze_layout_with_fill_order  s    yy66u==rx   c                8    | j                   j                  |      S rq   )r)  r  r  s     rv   r  z(MutableBox.freeze_layout_with_same_order  s    yy66v>>rx   c                :    | j                   j                  ||      S rq   )r)  r  r  s      rv   r  z+MutableBox.freeze_layout_with_exact_strides  s     yy99-WWrx   c                6    | j                   j                         S rq   )r)  r  r[  s    rv   r  zMutableBox.get_read_writes$  r	  rx   c                6    | j                   j                         S rq   rC  r[  s    rv   re  zMutableBox.get_reads'  r  rx   c                6    | j                   j                         S rq   rA  r[  s    rv   r  zMutableBox.num_reads*  r  rx   c                6    | j                   j                         S rq   r  r[  s    rv   r  zMutableBox.get_storage_numel-  r  rx   c                6    | j                   j                         S rq   r  r[  s    rv   r  zMutableBox.get_reduction_type0  r  rx   c                6    | j                   j                         S rq   r  r[  s    rv   r  zMutableBox.get_reduction_size3  r  rx   c                6    | j                   j                         S rq   r  r[  s    rv   r  zMutableBox.is_extern6  r  rx   c                6    | j                   j                         S rq   )r)  r  r[  s    rv   r  zMutableBox.is_no_op9  r  rx   c                8    | j                   j                  |      S rq   r  r  s     rv   r  zMutableBox.constant_to_device<  s    yy++F33rx   c                6    | j                   j                         S rq   )r)  r  r[  s    rv   r  zMutableBox.get_mutation_names?  r  rx   c                6    | j                   j                         S rq   )r)  r  r[  s    rv   r  zMutableBox.get_operation_nameB  r  rx   c                6    | j                   j                         S rq   )r)  r  r[  s    rv   r  z'MutableBox.get_inputs_that_alias_outputE  s    yy5577rx   c                6    | j                   j                         S rq   r  r[  s    rv   r  zMutableBox.realizeH  r  rx   c                8    | j                   j                  |      S rq   rl  r  s     rv   r  zMutableBox.get_free_symbol_usesK  s     yy--m<<rx   c                6    | j                   j                         S rq   r  r[  s    rv   rf  zMutableBox.get_read_namesP  r  rx   c                6    | j                   j                         S rq   )r)  ro  r[  s    rv   ro  zMutableBox.get_defining_opS  r	  rx   c                8    | j                   j                  |      S rq   )r)  r  r  s     rv   r  zMutableBox.codegen_referenceV  s    yy**622rx   c                6    | j                   j                         S rq   r)  r  r[  s    rv   r*  zMutableBox.layoutY  s     yy((**rx   c                6    | j                   j                         S rq   rx  r[  s    rv   r   zMutableBox.get_layout^  ry  rx   c                6    | j                   j                         S rq   r	  r[  s    rv   r  zMutableBox.get_output_speca  r	  rx   c                6    | j                   j                         S rq   r  r[  s    rv   r   zMutableBox.get_sized  r  rx   c                .    | j                   j                  S rq   )r)  r   r[  s    rv   r   zMutableBox.dtypeg  s    yyrx   c                t   t        | j                  t              rQt        |       j                   dt        | j                        j                   d}d}| j                  j                  }n&t        |       j                   d}| j                  }d}|t        t        |            |g}dj                  |      S )Nrx  z))r   
)rr   r)  r  r   r   rz  r   r{  )rU  line0endlrr  r|  s        rv   ro  zMutableBox.__str__k  s    dii,Dz**+1T$))_-E-E,FaHEDIINNEDz**+1-EIIED 3u:

 yyrx   r  r  r  r  r  r  rq   r  r  r  r  r  r  r   r!  r#  r$  r%  r&  r'  r  r)  r+  r,  r(  r  r  r  r  r  r  ).r   r   r   r  r   r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  re  r  r  r  r  r  r  r  r  r  r  r  r  rf  ro  r  r2  r*  r   r  r   r   ro  r  r   rx   rv   r  r    sO    L2&'(&$7+('+) 7<OO/3O	O
>? DIX+X<@X	X
+%%-..%$4..8# %*=!=	!=
*+3 + +&+$   " Hrx   r  c                      e Zd Zed        Zy)rm   c                N    t        | t              r| S t        t        |             S rq   )rr   r   rm   r_  )r)  s    rv   ry  zTensorBox.create  s"    d12KD)**rx   N)r   r   r   r/  ry  r   rx   rv   rm   rm     s    + +rx   c                  F    e Zd Zd
dZd ZddZddZd
dZd ZddZ	d Z
y	)r_  c                    t        | j                  t        t        f      r4| j                  j	                         t
        j                  j                  v S yr  )rr   r)  r%  r.  r  r`   r   r  r[  s    rv   r  zStorageBox.is_input_buffer  s:    dii+!?@99%%'177+?+???rx   c                    t        | j                  t              xr4 | j                  j                         t        j
                  j                  v S rq   )rr   r)  r  r  r`   r   r  r[  s    rv   r  zStorageBox.is_module_buffer  s9    tyy>3 :		""$(9(99	
rx   c           	        t        | j                  t        t        t        t
        t        f      r| j                  j                         S t        | j                  t        t        t        t        f      sJ t        | j                               | j                  j                         }| j                  j                         }t        d t        | j                  j!                         | j                  j#                         | j                  j%                               | j                        | _        t&        j(                  j+                  | j                        | j                  _        t&        j(                  j/                  | j                         | j0                  | j                  _        || j                  _        || j                  _        | j                  j,                  S )Nr#  r$  )rr   r)  r+  r  r%  r.  r  r  r  r  r  r6  r   rl  ri  r   r   r   r   r`   r   r  r   r  rK  rO  rM  )rU  rO  rM  s      rv   r  zStorageBox.realize  sO   II	
 99%%''$))iD$%GH 	
$IIK
 	
H ii//1II++-	"!yy++-ii))+YY'')
 
	 00;			""499- LL		 +		'		yy~~rx   c                    t        | j                  t        t        f      r9| j                  j	                         j
                  dkD  r| j                          yyy)zL
        Called on buffers we expect to be forced to realize later.
        r5   N)rr   r)  r  r  r  nontrivial_read_countr  r[  s    rv   r  zStorageBox.realize_hint  sF    
 tyy9i"89		**,BBQFLLN G :rx   c                    t        | j                  t              xr3 | j                         t        j
                  kD  xs | j                         S rq   )rr   r)  r  r  r6   realize_acc_reads_thresholdr  r[  s    rv   r  z!StorageBox.has_exceeded_max_reads  s@    $))Y/ 
NNvAAA )&&(	
rx   c                F   |dkD  rt        | j                  t        t        f      r{t	        | j                        r3| j                  j                         ddg}t        fd|D              ry| j                         t        j                  kD  xs | j                         S y)zj
        A heuristic to decide if we should realize a tensor
        that is used multiple times.
        r5   expsigmoidc              3  :   K   | ]  }|j                   v   y wrq   )used_ops)r  ru   opcounts     rv   r  z5StorageBox.should_realize_on_reuse.<locals>.<genexpr>  s     @qG,,,@s   TF)rr   r)  r  r  r
  r  r  r  r6   realize_reads_thresholdr  )rU  r  	heavy_opsr	  s      @rv   should_realize_on_reusez"StorageBox.should_realize_on_reuse  s    
 19DII	9/EFdii ))446"I.	@i@@ 6#A#AA -**, rx   c                H    | j                  |      r| j                          y y rq   )r	  r  r  s     rv   r  zStorageBox.mark_reuse  s    ''.LLN /rx   c                6    | j                   j                         S rq   rA  r[  s    rv   r  zStorageBox.num_reads  r  rx   Nr  r  r  r  )r   r   r   r  r  r  r  r  r	  r  r  r   rx   rv   r_  r_    s+    

B
$%rx   r_  c                  0    e Zd ZU ded<   ded<   dZded<   y)Subgraphr   r   rM  graph_moduleNzOptional[GraphLowering]r   )r   r   r   r   r   r   rx   rv   r	  r	    s    
I&&%)E")rx   r	  c                    | D cg c]$  }t        |t              r|j                         n|& } }t        t	        d | D                    t        |       k  S c c}w )Nc              3  2   K   | ]  }t        |        y wrq   )r"  )r  ra  s     rv   r  z'_has_aliased_buffers.<locals>.<genexpr>  s     ;"V*;r(  )rr   r.  r  r   r0   )buffersra  s     rv   _has_aliased_buffersr	    s^      !+6? COG 
 z;7;;<s7|KKs   )Ac                  z     e Zd ZU dZdZded<   dZded<   dZded<   	 	 	 	 	 	 	 	 d fd	Ze	dd
       Z
ddZ xZS )InvokeSubgraphz.
    Ir node for the invoke_subgraph HOP.
    NOptional[Subgraph]r<  zOptional[list[TensorBox]]operandsOptional[list[MultiOutput]]r  c                    t         |   d ||       || _        t        j                  j                  |       | _        t        j                  j                  |        y rB  )ri  r  r<  r`   r   r  r   r  )rU  r<  r	  r*  rk  s       rv   r  zInvokeSubgraph.__init__  sQ     	 	 	

 !GG++D1		""4(rx   c                6   ddl m} t        j                  j                  }d }|j
                  j                  d      x}r	|d   dd  }n+|j                  dd  }|D cg c]  }|j
                  d    }}|D cg c]  }| j                  |       }}g }	t        |      D ]B  \  }
}t        |t              r|	j                  |       (|	j                   ||||
                D |	}|j                  |t        j                  j                  |j                  ||j                        |_        t        j                   |j                        5   |j                  j"                  |  d d d        |j                  j$                  }d }|D ]$  }t        |t              r|j'                         } n |J t)        ||t+        |      	      dfd
}t        |      D cg c]  \  }} |||       }}}|_        |S c c}w c c}w # 1 sw Y   xY wc c}}w )Nr5   )constrain_to_fake_tensoreager_input_valsr   r   ru  r>  r:  rB  rq  )r<  r	  r*  c           
        t        | t        t        f      r| S t        t	        | j                         | j                         | j                         | j                         | j                         j                        t        |fgd      S )Nr  T)rt	  )rr   r   r0  ra	  r,  r   r   r   r  r   r-  r   )r7  indinvoke_subgraphs     rv   create_outputz,InvokeSubgraph.create.<locals>.create_output@  s    &#8:N"OP"%002$..0#__.%002%00299 $C[M6: rx   )r7  rn   r	  rs   )r  r	  r`   r   r  r<  r  r   r  r   rr   r   r/  r;  r	  r   r@  rA  graph_outputsr   r	  r   r  )rz  r<  r	  r	  r  fake_operandsr	  fx_operandsru   new_operandsr   operandr  r   r	  r   r7  r	  s                    @rv   ry  zInvokeSubgraph.create  s   6 ww+++00445GHHH,Q/3M '++AB/K4?@qQVVE]@M@
 3;;QC%%a(;;%h/ 	LC'#89##G,##,WmC6HI		  >>!WW22((,&mm 3 HN
 $$X^^4 3"""M23 ....  	Gg'<= ++-	 !!!($F3
	" >Gw=OP	6=+PP")A A
 <(3 3J Qs   G?9HH	 H	Hc                &    |j                  |        y rq   )codegen_invoke_subgraphr[  s     rv   r\  zInvokeSubgraph.codegenU  r&  rx   )r<  r	  r	  zlist[TensorBox]r*  r   r   r   )r<  r	  r  )r   r   r   r  r<  r   r	  r  r  r  ry  r\  r  r  s   @rv   r	  r	    sl     $(H '*.H'.+/G(/
) 
),;
)EV
)	
) N N`.rx   r	  c                       e Zd ZU dZded<   dZded<   dZded<   dZded<   dZd	ed
<   	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	e
	 	 	 	 	 	 	 	 dd       ZddZddZ xZS )ConditionalNr  	predicate7Optional[list[Union[TensorBox, ShapeAsConstantBuffer]]]r	  r	  true_subgraphfalse_subgraphr	  r  c                    || _         || _        || _        || _        t	        |g|z         \  }}t
        	|   d |||       ||| _        t        j                  j                  |       | _        t        j                  j                  |        y N)r   r*  rC  r7  )r	  r	  r	  r	  _split_by_sym_typeri  r  r?  r`   r   r  r   r  )
rU  r	  r	  r	  r	  r*  r?  sym_argsr  rk  s
            rv   r  zConditional.__init__a  s     # *, 2I;3I J+"	 	 	
 (%6D"GG++D1		""4(rx   c                   | j                  |      }|D cg c]  }| j                  |       }}t        j                  j                  j                  d   }|D cg c]  }|j
                  d    }}||fD ]  }|j                  t        j                  j                  |j                  ||j                        |_        t        j                  |j                        5   |j                  j                  |  d d d         |j                  j                  }	|j                  j                  }
d|	fd|
ffD ]!  \  }}t        |	      st        d| d|        t        |	      t        |
      k(  s	J |	|
f       t        t!        |	|
            D ]  \  }\  }}|j#                         |j#                         k(  s
J |||f       |j%                         |j%                         k(  s
J |||f       |j'                         j(                  |j'                         j(                  k(  rJ |||f        t+        d |g|z   D              }t-        t        j                  j.                  j0                  t        j                  j                  j
                  j3                  d	d             }|J d
       t5        ||||t7        |      |      }dd}t        t!        |	t        j                  j                  j
                  d               D cg c]  \  }\  }}t9        t;        |j#                         |j%                         |j=                         D cg c]
  } ||       c}|j?                         D cg c]
  } ||       c}|j'                         j(                        |t@        |fg       }}}}}||_!        |S c c}w c c}w # 1 sw Y   axY wc c}w c c}w c c}}}}w )Nr  ru  r	  true_fnfalse_fnzVOutput aliasing is currently not supported in compiled torch.cond. The outputs of the z% subgraph of torch.cond are aliased: c              3  \   K   | ]$  }t        |t              s|j                          & y wrq   )rr   r   r   )r  os     rv   r  z%Conditional.create.<locals>.<genexpr>  s)      
a!67 LLN
r  r?  zcannot determine devicerq  )r	  r	  r	  r	  r*  r?  c                R    t        | t              r| S | j                  j                  S rq   )rr   rs   r   r  )r   s    rv   _maybe_exprz'Conditional.create.<locals>._maybe_expr  s    !S!66;;rx   r  )r   zUnion[int, torch.SymInt]r   zUnion[int, sympy.expr])"r  r`   r   r  r   r<  r;  r	  r   r@  rA  r	  r	  r  r   r   r   r   r   r   r-  rs  r+   r   r   r  r	  r   ra	  r,  r   r   r   r  )rz  r	  r	  r	  r	  ru   r	  r	  r<  true_outputsfalse_outputsr   r  r   t_of_or   r?  conditionalr	  r7  merged_outputr  s                          rv   ry  zConditional.create}  s    %%i0	2:;QC%%a(;;gg**//30;<1<< (+ 		7H~~%!"!6!6,,#0"*-- "7 "
 ((8 7&HNN&&67 7		7 }}22 44(,7*m9TU 	MD'#L1$**./TU\T]_ 	 < C$66U}8UU6&s<'GH 	UMAzS>>#s~~'77F!S#F7==?cmmo5D3}D5>>#**cnn.>.E.EET3PS}TE	U
  
[8+
 

 6GG&&GG  %%))*=tD
 !<#<<!!!#$F3/
	& /8L!''"6"6";";E"BC/
 
 +*FM !,,. **,4A4F4F4HIb+b/I6C6J6J6LMKOM!,,.55 

 
& &Y <<7 7b JM
s<   OO	!OAO%
O#O%
7O 0O%
O	
O%
c           	         |j                  |        |j                  | j                         | j                  t	        | di              y r	  )codegen_conditionalr	  r  r  r   r[  s     rv   r\  zConditional.codegen  s9    ##D)88MMOT\\749Lb+Q	
rx   c                    t        | dd       x}rBt        t        j                  j                  j
                  |      }|J |j                         S t               S r	  r	  r	  s      rv   rL  z$Conditional.get_unbacked_symbol_defs  r	  rx   )r	  rn   r	  -list[Union[TensorBox, ShapeAsConstantBuffer]]r	  r	  r	  r	  r*  r   r?  z,Optional[dict[sympy.Symbol, pytree.KeyPath]]r   r   )r	  rm   r	  r	  r	  r	  r	  r	  r  rS  )r   r   r   r	  r   r	  r	  r	  r  r  r  ry  r\  rL  r  r  s   @rv   r	  r	  Y  s    "&I&HLHEL(,M%,)-N&-+/G(/)) @)  	)
 !) ") H) 
)8 TT T 	T
 @T Tl
 rx   r	  c                    g }g }| D ]?  }t        |t              r|j                  |j                         /|j                  |       A ||fS rq   )rr   r   r/  r  )r   non_sym_argsr	  r   s       rv   r	  r	    sS     LH %c01OOCHH%$	% \!!rx   c                       e Zd ZU dZded<   dZded<   dZded<   dZded<   dZded	<   	 	 	 	 	 	 	 	 	 	 	 	 d fd
Z	e
	 	 	 	 	 	 	 	 dd       ZddZ xZS )	WhileLoopNr	  carried_inputsadditional_inputsr	  cond_subgraphbody_subgraphr	  r  c                   || _         || _        || _        || _        t	        ||z         \  }}t
        |   d |||       t        j                  j                  |       | _
        t        j                  j                  |        y r	  )r	  r	  r	  r	  r	  ri  r  r`   r   r  r   r  )	rU  r	  r	  r	  r	  r*  r	  r  rk  s	           rv   r  zWhileLoop.__init__  s     -!2** 2>DU3U V+"	 	 	
 GG++D1		""4(rx   c                (   ddl m} 	 	 	 	 	 	 dd}t        j                  j                  j
                  d   }t        j                  j                  j
                  d   }||z   }	|	D 
cg c]  }
|
j                  d    }}
|D 
cg c]  }
|
j                  d    }}
|D 
cg c]  }
|
j                  d    }}
|D 
cg c]  }
| j                  |
       }}
 |||      }|D 
cg c]  }
| j                  |
       }}
 |||      }||z   }||fD ]  }|j                  t        j                  j                  |j                  |	|j                        |_        t        j                  |j                        5   |j                  j                  |  ||u rYt        |j                  j                        t        |      k(  sJ  ||j                  j                  |      |j                  _        d d d         |j                  j                  }|j                  j                  }t        |      rt!        d|       t        |      d	k(  sJ |       |d   }t#        |t$              sK|j'                         t(        j*                  k(  sJ |       t        |j-                               dk(  sJ |       t        |      dkD  sJ d
       |d   j/                         }|J t        |      t        |      k(  s	J ||f       t1        t3        ||            D ]  \  }\  }}	 	 	 	 	 	 dd} ||j-                         |j-                                 ||j5                         |j5                                |j/                         |j/                         k(  sJ ||||f       |j'                         |j'                         k(  s
J |||f       |j7                         j8                  |j7                         j8                  k(  rJ |||f        t;        ||||t=        |            }|j                  8t#        |j                  j>                  t(        j@                  jB                        sJ  ||j                  j>                  |      d   }tE        |      }|D cg c]  }||   	 }}t1        |      D ci c]  \  }}||vr|| }}}|jG                         D cg c]w  \  }}tI        tK        |j/                         |j'                         |j-                         |j5                         |j7                         j8                        |tL        |fg      y }}}||_'        |D  cg c]  } tQ        | jR                  | |       c} |_*        tW        |      }!tW        |      }"tY        t        |            D cg c]  }||v rt[        |"      n
t[        |!       }#}t3        ||#      D ]g  \  } }| j]                         t        j                  j^                  v s1t        j                  j`                  jc                  |j]                                i |#S c c}
w c c}
w c c}
w c c}
w c c}
w # 1 sw Y   gxY wc c}w c c}}w c c}}w c c} w c c}w )Nr   )check_input_alias_and_mutationc                &   t        |       t        |      k(  sJ g }t        | |      D ]f  \  }}t        |t        j                        r6|j                  t        j                  ||j                         d             V|j                  |       h |S )NFr^  )	r   r   rr   r   r  r/  r  r  r   )tensor_boxesfake_tensorsretr{  fks        rv   _require_exact_stridesz0WhileLoop.create.<locals>._require_exact_strides  s     |$L(9999ClL9 #Bb%,,/JJ$::		5 ;  JJrN# Jrx   r  ru  r	  zOutput aliasing is currently not supported in compiled torch.while_loop. The outputs of the body_fn subgraph of torch.while_loop are aliased: r5   z9torch.while_loop is assumed to have at least one operand.c                    t        | |      D ]/  \  }}t        j                  j                  j	                  ||       1 y rq   )r   r`   r   r   r  )	lhs_exprs	rhs_exprslhsrhss       rv   _guard_list_equalsz,WhileLoop.create.<locals>._guard_list_equalsq  s8     !$Iy 9 <HCGG$$11#s;<rx   rq  )r	  r	  r	  r	  r*  r   r  )r	  'list[TensorBox | ShapeAsConstantBuffer]r 
  z,list[Union[int, torch.SymInt, torch.Tensor]]r   r
  )r
  Sequence[Union[int, Any]]r
  r
  r   r   )2torch._higher_order_ops.utilsr	  r`   r   r  r   r<  r  r;  r	  r   r@  rA  r   r	  r	  r  rr   r   r   r   r   r   r   r   r   r  r   r-  r	  r   modulefxGraphModuler0   rX  ra	  r,  r   r  r  r*  r@  rr  r   rs  r  r  r  r  )$rz  cond_fnbody_fnr	  r	  r	  r
  fx_carried_inputsfx_additional_inputsfx_all_inputsru   fake_all_inputsfake_carried_inputsfake_additional_inputs
all_inputsr<  cond_outputsbody_outputsrl  r   r   r8  bor

  
while_loopmutated_idxsmutated_idx_setr   r  r   real_outputsr7  rE  outputs_itermutated_inputs_iterall_outputss$                                       rv   ry  zWhileLoop.create  s|    	Q	A	F	 5	" GG0055b9 ww3388<),@@2?@Q166%=@@6GHqvve}HH9M!NA!&&-!N!N8FG1#++A.GG/@ST;LMaS..q1MM25
 $&77
 '* 	H~~%!"!6!6,,#0"*-- "7 "
 ((8 &HNN&&8  7*"8>>#?#?@C/E      8N$NN88/84 	2 }}22}}22- XXdWeg  < A%3|3%O!23;;=EJJ.11.qzz|$),1,):" 	
G	
" A))+!!!>"c,&77W.,9WW7$S%FG 	QKAxB<4<4< < r{{}bkkm<r}}@ ==?bmmo5J2r67JJ5<<>R\\^3@aR[@3==?))R]]_-C-CCPaR[PC	Q" )/!!$F3

 }}(ZMM  %(("6"6.
 	
 

 6MM  /

 %\25DEc*S/EE &l3
S/) H
 
"  ,113
 V !,,. **,*!,,.!,,.55 

 
 *
 &'
 3::sJ7'

#
 L)">2 S./
 *-)?D$%T,EWW
 
 NK8 	@HC||~!5!55 ++//?	@ S AH!NGM Z F


'

sJ   (YY Y<Y"#Y'9A7Y,Y95Y>A<Z&Z
8!Z,Y6	c                &    |j                  |        y rq   )codegen_while_loopr[  s     rv   r\  zWhileLoop.codegen  s    ""4(rx   )r	  r	  r	  r	  r	  r	  r	  r	  r*  r   r   r   )r
  r	  r
  r	  r	  r	  r	  r	  r  )r   r   r   r	  r   r	  r	  r	  r  r  r  ry  r\  r  r  s   @rv   r	  r	    s    NRNKRQUNU(,M%,(,M%,+/G(/)E) I)  	)
  ) ") 
)0 ff f F	f
 If fP)rx   r	  c                  @     e Zd Z	 ddd	 d fdZd fdZd	dZ xZS )
r   Nr  c          	     j   t         |   |||||d |       ddlm} |D 	cg c]   }	t	        |	t
              r|	j                  n|	" }
}	 ||g ||
|      }|J || _        t        j                  j                  j                  |d       | _        | t        j                  j                  |<   y c c}	w )N)r   r?  r   )get_effect_key)ri  r  torch._higher_order_ops.effectsr(
  rr   r  r   effect_typer`   r   effectful_opsr  prev_effect_buffer)rU  r*  r+  r  r		  r}  r   r?  r(
  r  uncovered_argsr*
  rk  s               rv   r  zEffectfulKernel.__init__  s     	/ 	 	
 	C GR
ABz!_5AGG1<
 
 %V-O~-O-OQWX&&&&"#''"7"7";";K"N-1k*
s   %B0c                    t         |          }| j                  F|j                  j	                  t        j                  | j                  j                                      |S rq   )ri  r  r,
  r  r  r7   rM  r  )rU  r3  rk  s     rv   r  zEffectfulKernel.get_read_writes  sU    g-/"".!!$$T%<%<%E%E%GH rx   c                     yr  r   r[  s    rv   r  z EffectfulKernel.has_side_effects  rp  rx   rq   r  r$  r  )r   r   r   r  r  r  r  r  s   @rv   r   r     s,     2 2 
2@rx   r   c                      e Zd Zy)r  Nr4  r   rx   rv   r  r    s    rx   r  c                  L    e Zd ZU ded<   ded<   ddZdddZddZdd	Zdd
Zy)r  r   r   +Union[FakeScriptObject, torch.ScriptObject]r   c                    | j                   S rq   r_  r[  s    rv   r  zTorchBindObject.get_name  r  rx   Nc                    | j                   S rq   r_  r  s     rv   r  z!TorchBindObject.codegen_reference  r  rx   c                    | j                   S rq   r   r[  s    rv   r  zTorchBindObject.get_value  r  rx   c                    t        | j                  t        j                        r| j                  S | j                  j                  S rq   )rr   r   r   ScriptObjectreal_objr[  s    rv   get_real_objzTorchBindObject.get_real_obj   s0    djj%"4"45::::&&&rx   c                h   | j                         }t        |j                               }t        j                  |      d   }|D cg c]=  }t        |t        j                        r!|j                         |j                         z  ? }}t        j                  t        j                  |d      S c c}w r  )r9
  r   __obj_flatten__rt  r  rr   r   r  r-  numelrA  rc  operatorr  )rU  real_script_obj	flat_dict
flat_elemsru   
flat_sizess         rv   get_buf_byteszTorchBindObject.get_buf_bytes   s    ++-88:;	((3A6
  
!U\\* NNqwwy(

 

 j!<<
s   AB/r  rq   r  )r   r2
  )r   ztorch.ScriptObjectr&  )	r   r   r   r   r  r  r  r9
  rB
  r   rx   rv   r  r    s&    
I66'
=rx   r  c                  4    e Zd ZU ded<   ded<   ddZd	d
dZy)r  r   r   r  r   c                    | j                   S rq   r_  r[  s    rv   r  zGeneratorState.get_name   r  rx   Nc                    | j                   S rq   r_  r  s     rv   r  z GeneratorState.codegen_reference   r  rx   r  rq   r  )r   r   r   r   r  r  r   rx   rv   r  r     s    
Irx   r  c                  V    e Zd ZddZddZdd	dZe	 	 	 	 d
d       Ze	 	 dd       Zy)_CollectiveKernelc                     yr  r   r[  s    rv   r  z!_CollectiveKernel.should_allocate!   r  rx   c                     yr  r   r[  s    rv   r  z"_CollectiveKernel.has_side_effects$   rp  rx   Nc                H   t        | j                        t        j                  j                  u sJ d       | j                  }|j
                  j                  | _        |j
                  j                  D cg c]  }|j                  s|j                   c}| _
        y c c}w )Nz,Setting cpp kernel needs a valid op_overload)r   r<  r   rJ  rK  rL  r   r:  rM  rN  r;  )rU  r:  r+  ru   s       rv   rC  z%_CollectiveKernel.set_cpp_kernel_name)   s    D$$%)>)>> 	
:	
> !!%~~22 #NN44.
AFF.
* .
s   6BBc           
     *   t         j                  j                  5   | j                  ||g|i |\  }}}}}	d d d        	rJ | d|	        D ]  }
|
j	                           |d   j                         } | t        |      ||      }t        j                  |      }|j                  j                  |D cg c]  }t        t        |      ||       c}       |j                  j                  |D cg c]  }|j                          c}       d|v r`|j                  j                  t        t        |      |d   |             |j                  j                  |d   j                                y y # 1 sw Y   GxY wc c}w c c}w )Nr  r   rq  r   )r`   r   rl  r  r  r   r  rt  tree_leavesr@  rp  r  r  r  r/  )rz  r+  rC  r   r   _example_outputr  r  r}  r?  
tensor_argr   re	  inpsr6  rE  s                   rv   create_inplacez _CollectiveKernel.create_inplace:   s    WW 	D #""66CDCFC!	D %E2C1D&EE$% 	!J 	! Q**,f%
 !!&)&&OST^Jf5sFCT	

 	!!T"Bc3<<>"BCF?##**z8&-P %%fUm&<&<&>? 9	D 	D. U #Cs   E>=F;F>Fc           
        t         j                  j                  5   | j                  ||g|i |\  }}}}}	d d d        	rJ | d|	        D ]  }
|
j	                           t        t              r| j                  ||      } | t        |      ||      }t        |      D cg c](  \  }}t        | j                  |      |t        |fg      * c}}|_        t        |j                  |      D ]T  \  }}t        j                  st!        |      r"t         j                  j"                  j%                  |j&                         V |j                  S  | | j                  |      ||      }t        j                  st!        |      s3t         j                  j"                  j%                  |j&                         |g|_        |S # 1 sw Y   xY wc c}}w )Nr  rq  )r`   r   rl  r  r  rr   r   r#	  r   r   ra	  r[	  r  r   r6   rb	  r]   rg  r  r   )rz  r+  rC  r   r   r  r  r  r}  r?  rN
  r   re	  r   r/  r6  s                   rv   create_out_of_placez%_CollectiveKernel.create_out_of_placew   s    WW 	D #""66CDCFC!	D %F3D2E&FF$% 	!J 	! nd+__[.AF!0F "+>!: Av ((0AYKFN  #6>>>B <V::BSC GG--11#((;	<
 >>!$$^4F 66>O? ))--fkk:$XFNMa	D 	D*s   G,-GGr  rq   r  )rC  !Union[TensorBox, list[TensorBox]]r   r   )rC  rS
  )	r   r   r   r  r  rC  r  rP
  rR
  r   rx   rv   rG
  rG
      sV    
	
" $@>$@	$@ $@x 3>3 3rx   rG
  c                  8     e Zd Zd Zedd       Zd fdZ xZS )_WaitKernelc                
   | j                   d   }t        |t              r|j                   d   gS t        |t              rC|j                   d   }t        |t              r"|j                  d   \  }}|j                   |   gS g S g S r  )rC  rr   rG
  ra	  r4  )rU  rE  collr   r   s        rv   get_volatile_readsz_WaitKernel.get_volatile_reads   s}    kk!nc,-JJqM?"[) ::a=D$ 12Q3C())I Irx   c                r   t         j                  j                  5  | j                  ||      \  }}}}}d d d        rJ | d|         | t	        |j                               |      }|j                  j                  t        t	        |j                               ||             y # 1 sw Y   zxY w)Nr  rq  )	r`   r   rl  r  r  r   r@  r/  r  )	rz  r+  rE  rM
  r  r  r}  r?  re	  s	            rv   create_waitz_WaitKernel.create_wait   s    WW 	0 ""63/!	0 %E2C1D&EE$cnn./
 	&&:S^^-=>VL	
!	0 	0s   B--B6c                    t         |          }| j                         }|D ]>  }|j                  j	                  t        j                  |j                                      @ |S rq   )ri  r  rX
  r  r  r7   rM  r  )rU  r3  volatile_readsvrrk  s       rv   r  z_WaitKernel.get_read_writes   sZ    g-/002  	GB!!,"6"6r{{}"EF	Grx   )rE  rm   r   r   r$  )r   r   r   rX
  r  rZ
  r  r  r  s   @rv   rU
  rU
     s&    * 
 
* rx   rU
  c                2   t        | t        t        f      rt        |       S t        | t        t
        f      r2t        t        j                            }| D ]  }|t        |      z  } |S t        | t        j                        rt        |       S t               S rq   )rr   r.   r   r(   r   r   r0   r   r   r  r   r  r   r|  r   s      rv   r  r     s    !h%&$Q''	At}	%u||$& 	0A,Q//A	0	Au||	$$Q''|rx   c                2   t        | t        t        f      rt        |       S t        | t        t
        f      r2t        t        j                            }| D ]  }|t        |      z  } |S t        | t        j                        rt        |       S t               S rq   )rr   r.   r   r'   r   r   r0   r   r   r  r   r  r_
  s      rv   r  r     s~    !h%&A	At}	%u||$& 	'A#A&&A	'	Au||	$A|rx   )ru   r   r   r   )r   r   r   r   )r   r   r   r  )r   Sequence[int]r   z&Callable[[Sequence[_T]], Sequence[_T]])r   z&Callable[[Sequence[_U]], Sequence[_V]]r   z&Callable[[Sequence[_T]], Sequence[_U]]r   r  )ru   r)   r   r   r   r|   rq   )r   z(Sequence[Union[int, torch.SymInt, Expr]]r   zOptional[ShapeEnv]r   ra
  )r   Sequence[Union[int, Integer]]r   ra
  r   )ru   zLiteral[None]r   r   r   r   )ru   rn   r   r   r   r  )ru   r  r   r   r   zOptional[torch.Tensor])r   zOptional[Sequence[_T]]r   z Optional[Sequence[Optional[_T]]])ru   z2Union[IRNode, OutputSpec, torch.device, None, str]r   r  )ru   z&Union[IRNode, torch.device, None, str]r   r   )ru   zUnion[Buffer, TensorBox]r  rs   r   r   )r  r  r  r  r  r  r   r   )r/  Union[TensorBox, BaseView]r0  z"Sequence[Union[int, torch.SymInt]]r   rc
  )r>  rM  r   r   )rC  rR  r   r  )r   zUnion[Expr, Sequence[Expr]]r   r  r   r_   )r  r   r   r  r  r   r   r  )ru   rn   r   r   )TFNFN)ru   rn   rR  r   r\  r   r]  'Optional[Sequence[Union[int, Integer]]]r  r   r  rd
  r   ztuple[StorageBox, Layout])ru   rn   r]  rb
  r   r   )r   rn   r   r   )r   r  r  r  r   r   )r   r  r   rs   )r	  r  r   r   )r   rN  r   z-tuple[list[ShapeAsConstantBuffer], list[Any]])r   r   r   r  (-  
__future__r   r0  r-  rA  rd  loggingr=
  textwraprM  rr  collections.abcr   r   r   r   r   enumr	   r
   r   r   r   r   r   r   r   r   r   typing_extensionsr   r   r   unittest.mockr   r   r   r   r   torch._export.serde.schema_exportserder
	  r3	  torch._library.utilsr	  r  r  torch._loggingr   torch.fxtorch.utils._pytree_pytreert  torch._dynamo.utilsr   torch._export.serde.serializer   *torch._higher_order_ops.auto_functionalizer   torch._inductorr   torch._prims_commonr   r    r!   r"   r#   torch._subclasses.fake_tensorr$   %torch.fx.experimental.symbolic_shapesr%   r&   r'   r(   r)   r*   r+   r,   r-   r.   torch.utils._ordered_setr0   torch.utils._sympy.functionsr1   r2   r3   torch.utils._sympy.symbolr4   rr  r6   r7   codegen.commonr8   r9   r:   r;   r<   r=   r>   r?   r@   	loop_bodyrA   ops_handlerrB   rC   rD   rE   runtime.benchmarkingrF   runtime.hintsrG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   virtualizedr^   r_   r`   "torch._library.fake_class_registryra   torch.fx.noderb   codegen.cuda.cuda_templaterc   r   rd   re   r   r   r  __version__r,  r+  ImportErrorrf   rg   rh   rs   ri   r  rj   	getLoggerr   rE  rz  r^  r   r   ro   rw   r	  r{   r   r   r   r   r   r   r  r  r   r   r   r   r   r   r  r
  r  r"  r5  r@  rF  rn   r4  rU  r  r  r  r  r   r  INNER_FN_TYr  r  r  r  r-  r6  r   rY  r+  rd  rf  rj  r   r  r  r  r  r.  r  r$  rA  rH  rO  rX  r\  r   r  r,  r   r  r  r  r  r  r`  r  r%  r(  r  r0  r   r+  r  r  r   r   PrimitiveInfoTyper  r  r  r  r  r  r  r  r  r  r  r2  r  r  r  r  r4  rP  r  r  r  r  r  r  r  r  r   r   r  r  rk	  r   ra	  r  rm   r_  r	  r	  r	  r	  r	  r	  r   r  r  r  rG
  rU
  r  r  r   rx   rv   <module>r
     s   "          9 9 :  
 
 
 = <   ' ' 2 2 , ,   $ $ ( ? M #  :   0 L L * "     N N - :     . * ) C"8$% %L)$''NJ T]T]T]CI&) &C,-) -g!			8??4	8yy~~'T  k	sDk!12K8STU	i 	) d#  $$$D44 , ! $  TX	1>P	 TX
	1
>P

 
 N 
 N 
 O 
 O .2&*8!%	>9	>	>;('0     
	.#G&#G/#G  #GL'x, x,v UH H HV ~
F ~
 ~
B& 
 
 
@ 
i 
 
F |$y!y!u=)< 8  JN<N<N +<NBF<N<N~ i
 i
 i
X 7AB7S9 7St#1 #L[
+ [
| D
5 D
 D
P 	 	 	 U5 U Up	 !<@=A999 9 :	9
 9 ;9 9x:	$ \
v \
 \
~ K K K\ (( ( (V 79( 79 79t (  : j; j jZ Rh R Rj % % %POA OAd 6  " K| K K$ S| S S'9	<7 7 QTZ QT QTh& $I7V I7XPf P2T $%{ $%N   .Q* Q*h UV]  D U&fi & & & 
K 
[ 
& 6  & F   UG4_ G4 G4T
B
_ B
JK> K\ #udCeCeT<Q6R1SST 6$ 6$r"| "
D=. D=N5 50( (& U>? > >B h9 hV UM< M M` U l    F
/ 
(&" &"RV .;L ;|%
 %
P
- 
$/
\ /
dul up&)| &)T-, -`< 6B5 B"F- F,H)l H)V,)| ,)^!T !TH-L -46;< 6;r U  
k*& k*\ U
. 
 
< 
  &
, &
V S S Sl+
 +T% T%n U*v * *L Ug.\ g. g.T UH , H  H V"
"2" UI) I) I)X,n ,^	6 	 =l = =@ \  K K\2# 2p oC  NJs   !e 	ee