
    rh                   
   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZmZmZ d d
lm Z m!Z!m"Z"m#Z#m$Z$m%Z% d dl&m'Z' d dl(Z)d dl*Z)d dl+m,c m-Z. d dl/m0Z0 d dl)m1Z1 d dl2m3Z3 d dl4m5Z5m6Z7mZ8m,Z9 d dl:m;Z; d dl<m=Z= d dl>m?Z?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZG d dlHm6ZI d dlJmKZK d dlLmMZMmNZNmOZO d dlPmQZQmRZRmSZS d dlTmUZUmVZVmWZWmXZX d dlYmZZZ d dl[m\Z\m]Z]m^Z^m_Z_m`Z`maZa d dlbmcZc d dldmeZemfZfmgZgmhZhmiZimjZjmkZkmlZlmmZm d dlnmoZo d dlpmqZq d dlrmsZs d dl*mtZt d d lumvZvmwZw d d!lxmyZy d d"lzm{Z{ d d#l|m}Z} d$d%l~mZ d$d&lmZmZ d$d'lmZ d$d(lmZ d$d)lmZ d*d+lm6Z6mZ d*d,lmZmZ d*d-lmZ d*d.lmZ d*d/lmZ d*d0lmZ d*d1lmZmZ d*d2lmZ d*d3lmZ d*d4lmZmZ d*d5lmZ d*d6lmZ d*d7l,mZmZmZmZmZmZmZmZmZ d*d8lmZ er d d9lmZmZ d d:l[mZ d d;lmZ d d<lmZ d*d=lmZ  e"d>      Z ed?      Zes e6jt                         s	dd@ZddAZnd dBlmZmZ er
d dClmZmZmZ  G dD dEej                        ZddFZ eī       \  ZZ ej                  eȫ      Ze)j                  j                  edG      Ze)j                  j                  edH      Ze)j                  j                  edI      Ze)j                  j                  edJ      Ze)j                  j                  edK      ZddLZddMZddNZddOZ ej                  d      ddP       Zej                  ddQ       ZddRZ	 	 	 	 	 	 	 	 ddSZ	 d	 	 	 	 	 ddUZ	 	 	 	 	 	 ddVZ	 d	 	 	 	 	 ddWZdddXZ	 	 	 d	 	 	 	 	 	 	 	 	 ddYZddZZ	 	 	 	 dd[Z	 	 	 	 	 	 dd\Z	 d	 	 	 	 	 	 	 dd]Z	 d	 	 	 dd^Zej                  dd_       Z G d` dae$dTb      Z G dc dde#      Z	 	 	 	 	 	 	 	 ddeZ edfg      	 	 	 	 	 	 	 	 ddh       Z G di dj      Z G dk dle      Z G dm dne      Z	 	 	 	 	 	 	 	 	 	 ddoZ	 	 	 	 	 	 ddpZ	 ddqdqdqdr	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddsZddtZ	 	 	 	 	 	 	 	 dduZ	 d	 	 	 	 	 	 	 ddvZedf	 	 	 	 	 	 	 	 	 ddwZ ed       Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddxZddyZddzZedddTf	 	 	 	 	 	 	 	 	 	 	 	 	 dd{Zdd|Z	 	 	 	 	 	 	 	 dd}Z	 	 	 	 	 	 	 	 dd~ZddZ	 ddd	 	 	 	 	 	 	 	 	 ddZy)    )annotationsN)ABCabstractmethod)defaultdict)AbstractContextManager)currentframe)count)
attrgetter)AnyCallableOptionalTYPE_CHECKINGTypeVarUnion)Neveroverride	ParamSpecProtocol	TypedDictUnpack)mock)#min_cut_rematerialization_partition)fx)enable_python_dispatcher)compiled_autogradconfigloggingutils)get_interface_for_device)wrap_compiler_debug)	chromium_event_timedCompileEventLoggercountersdetect_fake_modedynamo_timedflatten_graph_inputsget_metrics_contextlazy_format_graph_codeset_feature_use)r   )!unwrap_tensor_subclass_parameters)aot_export_modulemake_boxed_funcSerializableAOTDispatchCompiler)	code_hashFxGraphCacheoutput_code_log)BoxedDeviceIndexformat_default_skip_message#log_cudagraph_skip_and_bump_counterPlaceholderInfo)save_args_for_compile_fx_inner)CompiledAOTICompiledFxGraphCompiledFxGraphConstantsWithGmget_expanded_dimsindex_expanded_dims
OutputCode)	cache_dir)		BoxedBoolcount_tangentsfresh_cacheget_all_devices	InputTypeis_gpushould_assume_input_aligned should_use_remote_fx_graph_cachetensor_is_aligned)FakeScriptObject)trace_structured)compile_time_strobelight_meta)GraphModule)free_unbacked_symbolsSymExprPrinter)FakeTensorProp)_WaitCounter)
OrderedSet   )aot_autograd)ShortenTraceback	SkipFrame)_use_lazy_graph_module)_PyTreeCodeGen)
has_triton   )r   metrics)get_wrapper_codegen_for_deviceinit_backend_registration)DebugContext)select_decomp_table)InductorError)joint_graph_passes)post_grad_passesview_to_reshape)pre_grad_passes)GraphLowering)get_device_typeIRNode)complex_memory_overlap)TritonBundler)	align_inputs_from_check_idxsclone_preserve_stridescopy_misaligned_inputs get_cloned_parameter_buffer_name%get_first_incompatible_cudagraph_node#maybe_get_suppress_shape_guards_ctxoutput_noderemove_unaligned_input_idxsshape_env_from_inputs)V)	GeneratorSequence)_StrideExprStr)
OpOverload)Weights)ExternKernelNode_P_Tc                "    t         j                  S N)dynamo_utilsidentityattrs    m/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/torch/_inductor/compile_fx.pytime_and_logr      s    $$$    c                      y ry    )argskwargss     r~   log_optimus_to_scubar      s    r   )r   r   )FQNGraphInputNameGraphSignaturec                      e Zd ZdZdZdZy)FxCompileModer   rV   rO   N)__name__
__module____qualname__NORMAL	SERIALIZE
SUBPROCESSr   r   r~   r   r      s    F IJr   r   c                 F   d} t         j                  j                  |       }|t        j                  dfS d}|j                         j                  d      rd}|dd  }	 |j                         }t        |   |fS # t        $ r dd l	} |j                  t              }|j                  d|| dj                  t        d	 t        j                  j!                         D                           t         j                  j#                  |        t        j                  dfcY S w xY w)
NTORCHINDUCTOR_FX_COMPILE_MODEFzasync+T   r   z>Invalid value of %s for %s. Expected one of %s. Using default.z, c              3  2   K   | ]  }t        |        y wry   )repr.0xs     r~   	<genexpr>z+_fx_compile_mode_default.<locals>.<genexpr>   s     OT!WOs   )osenvirongetr   r   lower
startswithupperKeyErrorr   	getLoggerr   errorjoinsorted__members__keyspop)namevalue	use_asyncr   logs        r~   _fx_compile_mode_defaultr      s    *DJJNN4 E}##U**I{{})	ab	+U#Y.. +g)		LIIfOm.G.G.L.L.NOOP		
 	

t##U**+s   A: :B#D D 
perf_hintspre_grad_graphspost_grad_graphscudagraph_static_inputsinductor_metricsc                    t         j                  j                  j                         }t	        t        |             }|r|j                  s|S |j                  j                  S ry   )torch_guardsTracingContexttry_getlistrangefw_metadatastatic_input_indices)	num_fixedcontextfixeds      r~   get_static_input_idxsr      sM    
 mm**224Gy!"E'--333r   c                $   | j                   j                  d      d   }g }t        |j                  d   t        j
                  j                        s|j                  d   }n|j                  }|D ]  }t        |t        j
                  j                        rW|j                  j                  d      x}:t        |t        j                        r |j                  |j                                ~|j                  d         ||j                  d<   y )Noutputopr   valoriginal_output_strides)graph
find_nodes
isinstancer   r   r   Nodemetar   Tensorappendstride)gmrl   output_stridesoutput_node_argsr   r   s         r~   record_original_output_stridesr      s    ((%%%215KNk&&q)588==9&++A.&++" (vuxx}}-..;3-!!#**,/!!$'( 3AK./r   c                    | j                   j                  dt        j                  j                  j
                        D ]0  }t        | |j                  d   j                        }t        |       2 t        |        y )Ncall_functionr   targetr   )r   r   r   opshigher_orderinvoke_subgraphgetattrr   r   )_recursive_record_original_output_stridesr   )r   nodesubgraphs      r~   r   r      sh    ##599#9#9#I#I $  < 2tyy|2231(;	< #2&r   c           	        | j                   j                  dt        j                  j                  j
                        D ]  }t        | |j                  d   j                        }|j                   j                  d      D ]r  }t        t        |j                  d               D cg c]8  }t        |j                  d   |   t        j                  j                        r|: c}|j                  d<   t t        |        y c c}w )Nr   r   r   r   r   user_visible_output_idxs)r   r   r   r   r   r   r   r   r   r   lenr   r   r   r   *_recursive_record_user_visible_output_idxs)r   r   r   idxs       r~   r   r      s    ##599#9#9#I#I $  = 2tyy|223NN---: 	D !TYYq\!235diil3/? 5DII01	 	38<=5s   (=Dc                 4    t        j                  t              S ry   )dynamo_loggingget_step_loggerr   r   r   r~   _step_loggerr     s    ))#..r   c                    t         j                  j                         rgt         j                  j                  j                  j
                  s8t         j                  j                         dk\  rt        j                  d       y y y y )N)   r   zTensorFloat32 tensor cores for float32 matrix multiplication available but not enabled. Consider setting `torch.set_float32_matmul_precision('high')` for better performance.)	r   cudais_availablebackendsmatmul
allow_tf32get_device_capabilitywarningswarnr   r   r~   _warn_tf32_disabledr     sc     	

!##**55JJ,,.&8d	
 9 6 	"r   c           
        t        | j                  d      D cg c]  \  }}|	 c}}      j                  t        | j                  d      D cg c]  \  }}|	 c}}             dfd}|j                  j
                  D ]  }|j                  dk(  s|j                  }|j                  d      s|j                  d      sBt        | |      sO t        |      |      } t        |      |       }t        j                  ||      r|j                  |j                  k(  r|j                  d      rdnd}	 ||j                  |	      }
|	 |
 }||_        t        |||       j                  |        yc c}}w c c}}w )	a  
    In aot_export_module (make_fx), we create get_attr nodes with name prefix
    "_tensor_constant" and "_torchbind_obj". See Tracer.create_arg() in
    torch/fx/_symbolic_trace.py

    However, this might result in name collision if the original mod already
    has a different buffer with the same name.

    We resolve this potential name collision here by changing the target name
    with a new number post fix.
    Fremove_duplicatec                .   d}| j                   D ]  }|j                  dk(  s|j                  j                  |      s/t	        |j                        t	        |      kD  sQ|j                  j                  |      d   }|j                         st        |t        |            } D ]f  }|j                  |      st	        |      t	        |      kD  s-|j                  |      d   }|j                         sRt        |t        |            }h |dz   S )Nr   get_attrrV   )	nodesr   r   r   r   splitisdigitmaxint)r   prefixir   post_fixkeyexisting_keyss         r~   find_smallest_iz0_resolve_name_collision.<locals>.find_smallest_i4  s    KK 	2Dww*$)?)?)Gt{{#c&k1#{{008<H'')3x=1	2 ! 	2C~~f%s8c&k)"yy04H'')3x=1	2 1ur   r   _tensor_constant_torchbind_objN)r   zfx.Graphr   strreturnr   )rN   named_parametersupdatenamed_buffersr   r   r   r   r   hasattrr
   r   equaldtypesetattradd)modr   r   r   r   r   target_name	gm_targetmodel_targetr   new_idnew_target_namer   s               @r~   _resolve_name_collisionr     sw    "33U3KL)$LM #*;*;U*;*STYT3DTU   /77j ++K))"!,,-=>3,/
;/3I2:k237LI|4OO|'9'99 ))*<= #% 
 %RXXv6F!'1O)DKB3o.7/- 	M Us   F 
Fc           	        ddl m}m} t        | |       i }| j	                  d      D ]   \  }}|||<    |||||j
                         " | j                  d      D ]   \  }}|||<    |||||j                         " |j                  j                  d      }	g }
|	D ]  }|j                  }||j                  v r!|j                  |   }|
j                  |       >||j                  v rE|j                  |   }|
j                  |       t        ||         |j                  t!        |      <   ||j"                  v sJ |
j                  d         ddlm} t)        |j                  j*                        d	   j,                  d   }g }|j.                  }|j0                  }|j2                  }t5        |      D ]y  \  }}d }|t7        |      t7        |      z   t7        |      z   k  r;|j                  |v r||j                     }n|j                  |v r||j                     }|j                  |       {  |||
|t9        j:                         d |i       }|S )
Nr   )_assign_attr	_AttrKindFr   )	attr_kindplaceholderr   )_unliftr   )torch.export.unflattenr  r  r  r  	PARAMETERr  BUFFERr   r   r   inputs_to_parametersr   inputs_to_buffersrg   r   ri   user_inputstorch.export._unliftr  r   r   r   buffers_to_mutateuser_inputs_to_mutateoutput_tokens	enumerater   pytreeLeafSpec)r  r   graph_signaturer  r  
state_dictr   parambufferplaceholder_nodeslifted_inputsr   	node_nameparameter_namebuffer_namer  outputsmutated_outputsbuffer_mutationsuser_input_mutationsr#  r   outr   unlifted_gms                            r~   _unlift_graphr6  b  s{    ?C$OQJ++U+C 
e 
4))		

 ))5)A 
f!
4&&		

 ++}+=)+M " 'II	<<<,AA)LN  0/;;;);;IFK  -&z+'>? GG4[AB  ; ;;;;  &' -288>>"2&++A.GO&88*@@#11Mg& 	&S6:%&-A)BBSEWWWxx++(211,SXX6u%	& 

K r   Fc              #    K   t        d | j                  j                  d      D              }t               }| j                         D ]@  \  }}||v st	        |t
        j                  j                        s0|j                  |       B |rl| j                  j                  dt
        j                  j                  j                        D ]*  }|j                  |j                  d   j                         , |E d {    y 7 w)Nc              3  4   K   | ]  }|j                     y wry   )r   r   s     r~   r   z&_get_subgraph_names.<locals>.<genexpr>  s      55s   r   r   r   r   r   )rN   r   r   named_childrenr   r   r   rI   r  r   r   r   discardr   r   )r   skip_invoke_subgraphall_subgraph_namesfx_subgraph_names
child_namechild_moduler   s          r~   _get_subgraph_namesr@    s      +5 5((---<5 + *4$&$5$5$7 . 
L ++
%((..1
 !!*-. HH''uyy'='='M'M ( 
 	;D %%diil&9&9:	;
 !  s   AD$D9BD?D Dc                   t        ddd      5  t        j                  }t        j                  }t	        |       D ]'  }t        | |      }t        |d      }t        | ||       ) t        | |||      cd d d        S # 1 sw Y   y xY w)N_recursive_pre_grad_passesTpre_grad_pass_time_uslog_pt2_compile_eventdynamo_compile_column_usr   )	r%   r   add_pre_grad_passesremove_pre_grad_passesr@  r   rB  r  r`   )r   example_inputs
add_passesremove_passessubgraph_namer   new_subgraphs          r~   rB  rB    s     
$"!8
 N
 //
5504 	5Mr=1H5hCLB|4		5
 r>:}MN N Ns   A#A<<Bc                    t        ddd      5  t        | |      D ]  }t        | |      }t        ||        t	        |        d d d        y # 1 sw Y   y xY w)N_recursive_joint_graph_passesTjoint_graph_pass_time_usrD  )r%   r@  r   rO  r]   )r   r;  rL  r   s       r~   rO  rO    sf     
'"!;
  15IJ 	JMr=1H)(4HI	J 	2     5AAc                    t        ddd      5  t        |       D ]  }t        | |      }t        ||        t	        | |       d d d        y # 1 sw Y   y xY w)N_recursive_post_grad_passesTpost_grad_pass_time_usrD  )r%   r@  r   rS  r^   )r   is_inferencerL  r   s       r~   rS  rS    sb    	%"!9
 +
 14 	@Mr=1H',?	@ 	\*+ + +rQ  c                f   ddl m}m}m}m}m}  || |||      }	| |	       nd}
t        t        |	j                  j                        d   j                  d         D ci c]  \  }}|j                  | }}}g }g }i }| j                  j                  D ]V  }|j                  |v r|j                  |       #|j                  |   |k(  s6|j                  dk7  sF|j                  |       X |D ]B  }d|j                  z   } || |||
||j                        nd|       ||j                     ||<   D |ddd   D ]X  }|j                  r/|j                  D ]  }|j                  |   |k(  rJ d| d        >| j                  j!                  |       Z | j#                          |	|fS c c}}w )	a  
    This function takes an GraphModule input "gm".
    The gm will be split into 2 components,
      1) const_gm, which consists the subgraph of gm that can be constant folded.
      2) gm (being inplace modified,) which returns the graph after constant folding.

    If an additional "lifted_constants" argument is passed in, we will assume the gm has
    been lifted and run the transformation accordingly.

    When a "skip_folding_node_fn" callback is passed, we will skip constant folding on
    the nodes for which the callback returns True.

    const_output_index is a mapping of corresponding node name from gm to the
    output index of const_gm.
    Returns (const_gm, const_output_index)
    r   )CONST_MODULE_TAGMETA_TAG
MODULE_TAGreplace_node_with_constantrun_and_get_constant_graphNr   r  _FOLDED_CONST_znode: z user not empty.) torch._inductor.constant_foldingrW  rX  rY  rZ  r[  r$  tupler   r   r   r   r   r   r   users
erase_node	recompile)r   skip_constructorlifted_constant_namesskip_folding_node_fnrW  rX  rY  rZ  r[  const_gmconst_resultr   r   const_outputsto_erase_nodeto_replace_nodeconst_output_indexr   new_const_namens                       r~   split_const_gmrm    s   ,  *
35IH "7!>8:DL #,E(..2F2F,G,K,P,PQR,S"TQM  MO '99%""4(YYx $44M9Q  &	'   F)DII5" )0 ]49956		
 .;499-E>*F dd# &::ZZ Wvvh':5VvEU7VV5W HH%& LLN'''Es    F-c                Z   t         j                  j                  }t        |j                  j
                  |j                  j
                  |j                  j
                  |j                  j
                  g      }|D ]  }| j                  j                  d|      D ]  }t        |j                  j                  dd       t         j                        s8|j                  d   j                  t         j                   k(  sc|j                  d   j"                  j$                  dk(  s  y  y)Nr   r   r   r   TF)r   r   atenrN   mmdefaultaddmmbmmbaddbmmr   r   r   r   r   r   r
  float32devicetype)r   ro  tf32_opsr   r   s        r~   is_tf32_warning_applicablery  C  s    99>>DGGOOJJHHLL  		
H  HH''?6'J 	D499==5u||DIIe$**emm;IIe$++00F:	 r   c                r   t        d | D              }t        j                  r=t        j                  r-|s+t        j                  d       t        j                  d      S t        j                  j                  r+t        j                  d       t        j                  d      S t        j                         S )z
    For CPU backend, enable comprehensive padding causes some unit tests
    fail due to changing number of generated kernels. Skip for now.
    c              3     K   | ]>  }t        |t        j                        st        |j                  j
                         @ y wry   )r   r   r   rB   rv  rw  )r   ts     r~   r   z6maybe_disable_comprehensive_padding.<locals>.<genexpr>_  s/      "#Au||9Tqxx}}s
   A$Az!Skip comprehensive padding on CPUF)comprehensive_paddingz;Skip comprehensive padding for use_runtime_constant_folding)anyr   disable_padding_cpur}  perf_hint_loginfopatchaot_inductoruse_runtime_constant_folding
contextlibnullcontext)rI  has_gpus     r~   #maybe_disable_comprehensive_paddingr  X  s      '5 G !!f&B&B7>?||%88				9	9I	
 ||%88%%''r   c                ^    | s|rt        j                  d      S t        j                         S )zH
    graph partition does not support cpp_wrapper and aot_mode yet.
    F)graph_partition)r   r  r  r  )cpp_wrapperaot_modes     r~   maybe_disable_graph_partitionr  o  s'     h||E22%%''r   c                   t               5  t        |      }|s;t        j                  j	                  d      } t        | |      j                  |  n\|st        j                         n t        j                  j                  |dd      }|5   t        | |      j                  |  ddd       ddd       |S # 1 sw Y   xY w# 1 sw Y   S xY w)z}
    If we can not detect fake mode from the context of inputs, create one.

    The created fake mode will be returned.
    Tallow_non_fake_inputs)moder  N)r   r$   r   _subclassesFakeTensorModerL   	propagater  r  r   r  objectpropagate_dont_convert_inputs)r   rI  force_allow_non_fake_inputs	fake_modectxs        r~   fake_tensor_propr  {  s     
"	# $^4	))88t8TI8N2I.88.I 3 &&(ZZ&&y2I4P 
  Pr	2PP#     s$   BCB:(C:C	?CCc                    t        j                  |       5  t        j                         cd d d        S # 1 sw Y   y xY wry   )r   r  get_config_copy)config_patchess    r~   get_patched_config_dictr    s1     
n	% (%%'( ( (s   4=c               #     K   t         j                  r#t        t               d      5  d  d d d        y d  y # 1 sw Y   y xY ww)NF)dirdelete)r   force_disable_cachesr?   r<   r   r   r~   with_fresh_cache_if_configr    s>     "" Y[7 		 	 		 	s   &A;AA Ac                  r    e Zd ZU ded<   ded<   ded<   ded<   ded	<   ded
<   ded<   ded<   ded<   ded<   y)_CompileFxKwargszOptional[BoxedBool]
cudagraphsSequence[int]static_input_idxsboolis_backwardzOptional[int]graph_idr  r  rU  zOptional[bool]
layout_optz1Optional[Callable[[list[ExternKernelNode]], Any]]extern_node_serializerzOptional[BoxedDeviceIndex]boxed_forward_device_indexN)r   r   r   __annotations__r   r   r~   r  r    s=    ##$$NMM ::r   r  )totalc                  $    e Zd Z	 	 	 	 	 	 	 	 ddZy)_CompileFxCallablec                     y ry   r   )selfr   rI  r   s       r~   __call__z_CompileFxCallable.__call__  s    
 r   Nr   rI   rI  Sequence[InputType]r   Unpack[_CompileFxKwargs]r  r;   )r   r   r   r  r   r   r~   r  r    s-     , +	
 
r   r  c                v   |j                  dd        |j                  dd       |j                  dd       |j                  dd        |j                  dd       |j                  dd       |j                  d	d        |j                  d
d        |j                  dd        t        j                         5 }|j                  t        j
                  j                  j                                |j                  t        t        j                               |j                  t        j                  dddddd             |j                  t                      |j                  t                      t        j                   d|d           t#        t$        d      | |fi |cd d d        S # 1 sw Y   y xY w)Nr  r  r   r  Fr  r  rU  r  r  r  compile_fx_innerinductor_compileTcompile_inductor#inductor_cumulative_compile_time_us)
phase_namerE  log_waitcounterwaitcounter_name_overriderF  )r  inductor)compiler_name)
setdefaultr  	ExitStackenter_contextr   r   _python_dispatch_disable_current_modesrS   dynamo_configuse_lazy_graph_modulerz   r%   r  rZ   r"   pt2_compiler    _compile_fx_inner)r   rI  r   stacks       r~   r  r    s   
 lD)
)2.
mU+
j$'
mU+
ne,
2D9
lD)
.5 
			 
5EKK88OOQR2=3V3VWX%%"-&* $*<)N		
 	689LN+&&}-	
 P"#4JO
 
'
 
 
s   7C.F//F8zcompilation time (in seconds)r|   c                T  ()*+ t         j                  }t        j                  j                  j
                  j                          t        j                  | j                        dk(  r|s~ddl
m} ddlm} |j                  |        t        j                  j                   j#                         }t%        j&                  dd|i|j(                         t+        | j,                        S |j/                  dd      }t0        j3                  d	|       t5        ||      }t7        t9        t;        t=        | j                  j>                                    j@                  d   tB        tD        f      sJ d
| j                          |jG                  d      &tI        tJ        jL                  jN                        |d<   tJ        jP                  rtS        | |fi | tU        jT                         }	tW               }
tY                t[        d d t]        |       D        D              }t_        ddd      5  tJ        j`                   xr tJ        jb                  xs |
xr | xr |}tJ        jb                  }|
}te        d|       tf        j3                  d||||tJ        j`                         ti        |      D ]L  \  }}t7        |t        jj                        s!tm        |jn                  jp                        sA||v sFd|_9        N d}d}d(d}tu        |       }tU        jv                         }|rty        jz                  | ||||      \  }(|v|\  }}tf        j3                  d|       |r)ty        j|                         }tf        j3                  d       ty        j~                  ||||||jG                  dd      |      \  }(ntf        j3                  d       ((d   dk(  r>|J tf        j3                  d((jG                  dd      nd       t        | ||fi |}nB(d   d k(  r|J |J tf        j3                  d!       t        j                          	 t        | ||fi |}|J tU        jv                         |z
  |_C        |\  }}||_D        ||_E        t        j                         \  }}|j                  |       	 t        j                          |t        |      (d"<   |j                  (d#<   tf        j3                  d$|       ty        j                  |||||       n;(d   d%k(  sJ |J |J |\  }}tf        j3                  d&|       ||_D        ||_E        |J |}((d   nd')t%        j                  d() (xs i |)       t%        j                  d*)|(r(jG                  d+      nd(r(jG                  d,      nd(r(jG                  d      nd-||.       (t        d/)fd0(fd12       |j                  |||       ddd       tf        j3                  d3tU        jT                         |	z
         t         j2                  j                         }|r)|\  *+t        d/d4 *fd52       t        d/d6 +fd72       tf        j                  t        j                        rg }t        d8   j                         D ]  \  }}|j                  d9      } t        |       d:k  r|j                  |d;d<d<d<|g       =t        |       d=k\  rd9j                  | dd>       nd9j                  | dd?       }!|!j                  d@      }"|"rDt        |       d=k\  r6| d>d \  }#}$}%}&d9j                  | dd>       }!|j                  |!|#|$|%|&|g       | d?d \  }$}%}&d9j                  | dd?       }!|j                  |!d;|$|%|&|g        tf        j                  dA       tf        j                  dBj                  dCdDdEdFdGdH             tf        j                  dI       |D ]9  }'tf        j                   dBj                  |'        tf        j                  dI       ; t        j                  j                  j
                  j                           t               t        j                  dJ|d   rdKndL dM|dN           S # t        t        f$ r  t        $ r3}t        |t                     j                  |j                        dd}~ww xY w# t        j                          w xY w# 1 sw Y   xY w)Oz
    Inductor API that compiles a single graph.

    If you change the argument list for this function, make sure you
    also update the call to save_args_for_compile_fx_inner below accordingly.
    r   )CompileEventLogLevel)_LazyGraphModulezbackward no-op
compile_id)metadata	log_levelr  r   z&static input idxs compile_fx_inner: %szGinductor can only compile FX graphs which return a tuple/list, but got r  Nc              3  8   K   | ]  }||j                     y wry   )supports_caching)r   backends     r~   r   z$_compile_fx_inner.<locals>.<genexpr>)  s&      #  	  #s   c              3  d   K   | ](  }t        |j                  t        j                         * y wry   )rX   rw  r   r  r   rv  s     r~   r   z$_compile_fx_inner.<locals>.<genexpr>+  s)      
 +6;;8J8JK
s   .0fx_codegen_and_compileT)rE  r  fx_cachezXFX cache status: use_cache=%s, local=%s, remote=%s, aot_mode=%s, force_disable_caches=%szFX cache key generated: %szUsing remote FX cacher  F)r  	constantszFailed to generate FX cache keycache_statebypasszFX cache bypass reason: %scache_bypass_reasonunknownz*FX cache disabled or key generation failedmissz,FX cache miss, compiling and saving to cachetriton_bundler_metatime_taken_nsz.Saving compiled graph to FX cache with key: %shitzFX cache hit with key: %sdisabledfx_graph_cache_)r  time_nsr  r   
componentszcache not enabled)r  cache_event_timer   r  r  remote_cache_enabledlocal_cache_enabledartifactc                     d  ddS )Nr  jsonr   encodingr   )r  s   r~   <lambda>z#_compile_fx_inner.<locals>.<lambda>  s    -k]; &% r   c                 .    t        j                         S ry   r  dumps)
cache_infos   r~   r  z#_compile_fx_inner.<locals>.<lambda>  s    4::j#9 r   metadata_fn
payload_fnz%FX codegen and compilation took %.3fsc                     dddS )N,inductor_generated_kernel_to_post_grad_nodesr  r  r   r   r   r~   r  z#_compile_fx_inner.<locals>.<lambda>  s    F"! r   c                 .    t        j                         S ry   r  )
debug_infos   r~   r  z#_compile_fx_inner.<locals>.<lambda>  s    tzz*5 r   c                     dddS )N*inductor_provenance_tracking_node_mappingsr  r  r   r   r   r~   r  z#_compile_fx_inner.<locals>.<lambda>  s    D"! r   c                 .    t        j                         S ry   r  )node_mappingss   r~   r  z#_compile_fx_inner.<locals>.<lambda>   s    tzz-8 r   aten_mm_info_   -?   )rs  rt  z$Overview info of inductor aten mms: z3{:<30} | {:<20} | {:<20} | {:<20} | {:<20} | {:<20}NameBMNKCountz----------------------------------------------------------------------------------------------------------------------------------ztorchinductor done compiling 	BACKWARDSFORWARDS graph r  )dro   aot_compilationr   	_inductorasync_compileCompiledTritonKernelscache_clearrz   count_callsr   torch._dynamo.utilsr  torch.fx._lazy_graph_moduler  force_recompiler   CompileContextcurrent_compile_idr"   log_instant_eventPT2_COMPILEr,   forwardr  static_inputs_logdebugget_input_idxs_to_checkr   nextiterreversedr   r   r^  r   r   r=   r   tritonr  	save_argsr5   timerD   rY   allr@   r%   r  fx_graph_cacher)   r   r$  r   rB   rv  rw  _is_inductor_staticr8   r  r/   prepare_keyget_remote_cacheload_with_keyr  re   begin_compile_time_taken_ns_fx_graph_cache_key_fx_graph_cache_debug_linescollectset_triton_bundlerQ   rR   	Exceptionr\   r   with_traceback__traceback__end_compiler  _save_graphinstantr  rG   post_compile1log_inductor_triton_kernel_to_post_grad_node_infoisEnabledForr   INFOr#   itemsr   r   r   r   endswithr  formatr   ),r   rI  graph_kwargsr  r  r  r  r  inputs_to_checkstartfx_graph_remote_cachebackends_support_caching	use_cachelocalremoter   inputmb_compiled_graphkey_inforemote_cacher  
start_timer   debug_lines	cache_keytriton_bundler  ecompiled_graphprovenance_infomm_table_datar   partsr   
is_batchedbatchmrl  krowr  r  r  r  s,                                           @@@@r~   r  r    s)	    &&H 
OO!!77CCE)Q.x 	=@((,]]11DDF
,,"J/*66	
 rzz**'3'>'>?RTV'WDFWX-n>OPOd4 89:??BUDMR 
QRTRZRZQ[\R %-%.v}}/G/G%H\"&	
 	
 IIKE<> " #
)"-
#   
 d
 tM +++ )&&?*?)) )	 	 %%&
I.		f''	
 ".1 	1HAu5%,,/5<<,,-**,0)	1 37
226	 \\^
%1%=%=NL/6&"Xz
 ##+ [		6<#/#@#@#BLII560<0J0J"  , 0 0 F'1-!: 		;<
 M!:h!F$,,,II, "- NN#8)DE !7NO!7C!
 &&0$,,,'''IIDE''),$:%;G%! )44437<<>J3N!0)1&	;8A!5@K!= "))+!'!33MB ))+".478K4L
01*;*J*JJ'IIF	R$$! m,555$000''''/$YII19=4=1<G9 ,,,* *4)?J}%Z 	 	""k]+%2	
 	&&#')3
u%7Az~~l3t  45(!' %	
  ! : 	##NI|LitMl II5tyy{U7JK ggOOQO 	
 6	
 	 9	
 %">288: 	BJCIIcNE5zA~$$c3S#u%EF ,/u:?388E#2J'sPR@TD'9:Jc%jAo!&rsq!Qxxcr
+$$dE1aE%BC  *1axxcr
+$$dCAq%%@A)	B, 	78AHHS#sG	

 	  	 CHHQJQQSVWXHHY	  
OO!!77CCELN'&}5;:
F Gj)*	, s %i0  #A|~6EEOO
 ))+[tM tMsR   /B"dd2d7EdA)b6>Ed6d.c<<ddddd'c                  $    e Zd ZU dZded<   ddZy)_FxCompileStatr   r   codegen_and_compilec                     d| j                    S )Nzcodegen_and_compile: )r\  )r  s    r~   __repr__z_FxCompileStat.__repr__8  s    &t'?'?&@AAr   N)r  r  )r   r   r   r\  r  r^  r   r   r~   r[  r[  4  s      Br   r[  c                  d    e Zd ZU dZ ee      Zded<   e	 	 	 	 	 	 	 	 	 	 dd       Z	e
dd       Zy)		FxCompileza
    An FxCompile represents a mechanism that can turn a GraphModule into an
    OutputCode.
    z%dict[type[FxCompile], _FxCompileStat]_compile_statsc                     y ry   r   )r  r   rI  rA  r@  s        r~   r\  zFxCompile.codegen_and_compileH  s     r   c                8    | j                   j                          y ry   )ra  clear)clss    r~   _reset_statszFxCompile._reset_statsQ  s      "r   N
r   rI   rI  r  rA  r  r@  r  r  r;   r  None)r   r   r   __doc__r   r[  ra  r  r   r\  classmethodrf  r   r   r~   r`  r`  <  sr     =H<WN9W
  , '	
 ' 
  # #r   r`  c                  2    e Zd Ze	 	 	 	 	 	 	 	 	 	 dd       Zy)_InProcessFxCompilec                D  /012 d|v r|d   J |d   }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }	t        j                  }
|j                  d	d      }|j                  d
d      }t        d      j	                         5  t        j                         5  t        j                  x},ddl	}t        j                  d|        |j                  |       t              r
t                t        d   j!                         }t#        j$                  t'        t#        j(                         d              t+               t,        j.                  d|rdnd d|        t1        j2                         }t4        j6                  j8                  j:                  j=                  ||dd       |j?                         2tA        dd 2fd       t        jB                  jE                  |       tG        |      }tI               tK        dd      5  t5        jL                         5  tO        |      }ddd       ddd       tQ               tA        dd fd       t        jR                        5  tU              }|5  tW        |       ddd       t        jB                  jY                  |       tZ        jC                  dt]        d ddd!             j_                  dddd"      /tA        dd# /fd$       t        j`                  jb                  rdt4        jd                  jf                  ji                  jj                        1tA        dd% 1fd&       1t4        jl                  jB                  _7        tq               }|js                         rbt"        jt                  d'k  r!tw        t        d(   jy                               }nt        d(   j{                         }t}        j~                  d|)       t        j                         r 	 t        d*t        t                     i+       ddd       t        jR                  |      5  t        |      5  t        |	|
      5  d}d}d}d}|
rt        j                  j                  rpt        d- .      \  }}t        |g |||	|
|||d/
      }t        j                  |      5  |	sJ d0       |j                          |j                         \  }}ddd       t        ||||	|
|||||r|j                  nd|r|j                  nd||1      }t        j                         }|j                          t        j                  |      5   |j                  |  g }|j                  t               0|j                  D ]  }t        |t              rq|j                         rat        t        |j                                     dk(  r<|j                  t        0fd2|j                         j                  D                     |j                  d        t        |       d}tK        d3d      5  |j                  rGd4d5l`ma}  |j                  sJ d0       |j                         \  }!}"t        jB                  d6|!j                         |"j                  r t        jB                  d7|"j                         d}#|j                  r1|j                  |j                        }#t        jB                  d8|#       tK        d9d      5  | j                  ||!j                  |"j                  |#|j                  g t        j                  |j                  j                  |r|j                  j                  ng z         :      }$ddd       n)|j                         }%|%j                  }$t        |%d;d      }ddd       t        j                  t,        j.                        ry|j                         \  }&}'}(t        xj                  |&z  c_r        t        xj                  |(z  c_s        t        xj                  |'z  c_t        t        j                  d<|&|'|(d=       |r.t        j                  j                  rt        jj                  j                  st5        jl                  j                  j                  | rd})jj                  j                  D ]  }*|*j                  j                  d>d      }+|*j                  d?k(  sCt        |+t4        j                        r)t4        jl                  j                  j                  |+      sr|*j                  j                  d@d      x})s n dA},|)r	|, dB|) dC},n|, dC},|,t        jj                  _x        |rrt        jj                  j                  sXt              }-|-rKdD|-j                    },|-j                  j                  d@d      x})r|, dB|) dC},|,t        jj                  _x        t        j                  r[t        $t        t        f      sJ t        |$      cddd       cddd       cddd       cddd       cddd       cddd       S |rTt        jj                  j                  s:ddElm}.  |.t        jj                  j
                        t        jj                  _x        | j                  t        |          xj                  d4z  c_        t        $||t        jj                  j                  |j                         t        d   |z
  |||||2/|      cddd       cddd       cddd       cddd       cddd       cddd       S # 1 sw Y   	VxY w# 1 sw Y   	[xY w# 1 sw Y   	xY w# t        $ r t        j                  d,       Y w xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   )xY w# 1 sw Y   .xY w# 1 sw Y   nxY w	 ddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       y# 1 sw Y   yxY w)FzS
        Generates the OutputCode from the GraphModule and example_inputs.
        r  Nr  r   r  Fr  r  rU  r  z/pytorch.wait_counter.actual_codegen_and_compiler   z3Sleeping for %s since sleep_sec_TESTING_ONLY is setr  i  ztorchinductor compiling r  r  r  )save_dirr  c                     dddS )Nfx_graph_runnablestringr  r   r   r   r~   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    / (% r   c                      S ry   r   )runnable_graph_strs   r~   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    #5 r   r  additional_fake_tensor_propTrE  c                     dddS )Nbefore_post_grad_graphrr  r  r   r   r   r~   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    4 (% r   c                 ,     j                  ddd      S )NFTprint_outputinclude_strideinclude_device)print_readable)r   s   r~   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    2#4#4!&tD $5 $ r   rU  %szAFTER POST GRADr|  r}  colored)r{  r|  r}  fast_sympy_printc                     dddS )Nafter_post_grad_graphrr  r  r   r   r   r~   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s     7$,) r   c                      S ry   r   )inductor_post_grad_graph_strs   r~   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    'C r   c                     dddS )Ninductor_post_to_pre_grad_nodesr  r  r   r   r   r~   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    $E(.- r   c                 .    t        j                         S ry   r  )provenance_tracking_jsons   r~   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>  s    4::6N+O r   )r  
   graph_break)	overwritenum_graph_breakspt2_configs)extra_loggingzfailed to log pt2_configsc                    | j                   dk(  xrc t        | j                  t              xrG | j                  j	                  d      xs* t        | j
                  j                  dd       t              S )Nr   r  r   )r   r   r   r  r   r   r   rF   )r   s    r~   r  z9_InProcessFxCompile.codegen_and_compile.<locals>.<lambda>)  s_    $''Z:O ;&t{{C8; !KK223CD X)$))--t*DFVW	 r   )rd  )	rI  	shape_envr  r  r  r  rU  r  is_const_graphz"AOT mode only supports C++ wrapper)rI  r  r  r  r  r  rU  r  rj  const_wrapper_codeconst_kernel_codeconst_modulerA  c              3  @   K   | ]  }j                  |        y wry   )doprint)r   sps     r~   r   z:_InProcessFxCompile.codegen_and_compile.<locals>.<genexpr>o  s     )X1!))A,)Xs   zGraphLowering.compile_to_fnrV   )AotCodeCompilerzOutput wrapper code: 
%szOutput kernel code:
%sz#Serialized Extern Kernel Nodes: 
%szAotCodeCompiler.compile)device_typeadditional_filesrunnerzGraph Metrics:
%s)num_bytes_accessednodes_num_elemnode_runtimesr   r  stack_tracezWgraph with symbolic shapes inputs and config.triton.cudagraph_skip_dynamic_graphs=True.z Found from 
z,disabling cudagraphs due to incompatible op ) check_lowering_disable_cudagraph)r   ro   r  rM   guardrz   preserve_rng_stater   sleep_sec_TESTING_ONLYr&  r   warningsleepry  r   r#   copysyssetrecursionlimitr   getrecursionlimitr   r   r<  ioStringIOr   _dynamorepro	after_aotsave_graph_reprogetvaluerG   r  fx_graphrn   r_   r%   no_gradr  r   set_fake_modeget_cuda_device_contextrS  fx_graph_transformedpost_grad_graphs_logr(   r~  traceenabledr   	tracebackget_graph_provenance_jsonr   r   _inductor_post_to_pre_grad_nodesr'   in_progressversion_infosumvaluesr  r"   compilation_metric	is_fbcoder   r  r  r3  r  r  r  r  rm  ra   set_graph_handlerruncodegen_with_cpp_wrapperr   rW   CachedMetricsHelperfreeze_runtime_assertsgraph_outputsrK   r   rc   has_tensor_outputr   rJ   
get_strider   r^  
get_layoutr   _check_triton_bf16_supportr  	codecacher  r  r0   extern_kernel_nodesr  compiler  dictfromkeyswrapper_coder  compile_to_modulecallr   inductor_metrics_logr;  count_bytesr  r  r  r  r$  cudagraph_skip_dynamic_graphsdisable_cudagraphs_reasonr   any_is_symbolicr   r   r   r   rj   r   r   r6   torch._inductor.cudagraph_utilsr  device_node_mappingra  rw  r\  r7   
get_deltas)3r  r   rI  rA  r@  r  r  r  r  r  r  rU  r  	sleep_secr&  inductor_countersfdr  r  cuda_contextmetrics_contextr  rj  const_graphr  r  re  r   metrics_helperr   r4  compiled_fn_runnerr  r  kernel_codeserialized_extern_kernel_nodescompiled_fncompiled_module	num_bytesr  r  r  r   meta_valdisablemaybe_incompat_noder  r  r  r  rt  s3    `                                             @@@@r~   r\  z'_InProcessFxCompile.codegen_and_compileW  s     |+\0J0VVV ,\ :
+7+;+;<OQS+T(,,]EB"."2"2:t"D(,,]EB**)--neD5t< 	
 JKQQSV	++-V	 $:::	GI9 

9%)"-#% ( 4 9 9 ; !!#c&;&;&=t"DELN*"-;:> ?!
$ BMM))::B
T ;  "$ 6 GGR0
 .n=I$ B-T E ]]_ E 0^ DIEE 6b9
 	 + HA6r:! O/NO,,R@$***)'+'+ $	 02/@/@!&#'#'%)	 0A 0, !!  D <<''**DDRXXN - %"% $P 1 OO))J #6"7"..0'''1+.x/F/M/M/O+P(+3M+B+H+H+J(&99"&9I ##%	A, -s3J3L/M+AHAV 	*o3NCo .k8Do
 &*""%)"$(! 3 3 P P 4B.40H0 #0 ')"+!)$/!)/E%1$/'+#K ,,[9 *P,PP{#)'@@B >*,= & $2'% +%+A!- +'94F*00D 4E)//$!,$3+. ")!<!<!> ,,.((/ iEII~.QSN**6 +,#(#6#6 <C *3 7$'$9$9$;$'(=cnn>N(O$PTU$U !/ 5 5$))X@P@W@W)X$X!" !/ 5 5d ;< /u5 *.&%5T 8 !>>B#(#4#4  D#4 9>8V8V8X5L++11 ;\=O=O  +00 / 5 5$={?P?P!" >B:$88$)$@$@(-(A(A%& !?
 !0 5 5$J$B!"
 ". 9QU" " />.E.E$)$0$6$6$/$5$5$B050A0A	6&)-,1,>,>,O,O 4? 1<0H0H0Y0Y57	-.**	6& /F /"	" ", /4.E.E.GO*9*>*>K18 /42.m8t ,88FCHCTCTCV@	>=22i?2-->-...@.,1106?2@1> #"MMGG ! A A!OO11AA>R&*$&HHNN 
&D'+yy}}UD'AH $= 8'1(ELL'I','<'<'L'LX'V (.2iimmM4.PP{P %
& #|&)0	k]"&MG)0	nG<C9!!''*K*K.STV.W+.(TUhUoUoTp&qG.A.F.F.J.J -t/  {   .5I\+b*Q@GAGG=(()+T{CCC+K8Si iMo o o oOV	 V	 V	r "!''*K*K
 = ! ; ; 9 ''T
3GG1LG*#&99&113 ,/@@"&)$'*4*si iMo o o oOV	 V	 V	VE EE E8O OD % A $?@AKHA HAZ z" "=8 89i i iMo o o o o o o o oOV	 V	 V	 V	 V	 V	s  +r E6r6n9n,	n9 :ro8(o	6Fo88or4q, q	A q--pA6qC%p,5CpA:p2p3F*p,Cp,0	q9	q		q,	r	r'Cp,5	q>	q		q,	r	r,n61n99o>roo8o5	1o84o5	5o88p=rp
qppp)$p,,p51q9	q	qq		q,q q,#	r,q51r8	rr
	rrNrg  )r   r   r   r   r\  r   r   r~   rm  rm  V  sK    qq ,q '	q
 'q 
q qr   rm  c                Z   t         t        j                  k(  rt               }nIt         t        j                  k(  rddlm}  |       }n$t         t        j                  k(  rddlm	}  |       }t        r'ddlm} ddlm} t        |      sJ d        ||      }j                  | |||      S )NrV   )_DebugSerdeFxCompile)_SubprocessFxCompile)_AsyncFxCompile)_OutOfProcessFxCompilez7async is only valid with an out-of-process compile mode)fx_compile_moder   r   rm  r   compile_fx_extr  r   compile_fx_subprocr  fx_compile_asynccompile_fx_asyncr  r  r   r\  )	r   rI  rA  r@  schemer  r  r  r  s	            r~   r  r    s     -...$&	M33	38%'	M44	4<%'5:&"89 	
E	
9 !(%%b./<XXr   c                d   g }t        |       D ]  \  }}t        |t        j                        s!t	        |j
                  j                        sAt               5  ||v rt        |      r
	 ddd       et        |      s
	 ddd       z	 ddd       |j                  |        |S # 1 sw Y   xY w)z
    This function runs at compile time, and generates a list of indices for which we
    might need to do a copy to preserve alignment requirements.
    N)r$  r   r   r   rB   rv  rw  rk   rE   rC   r   )inputsr  ids_to_checkr   rH  s        r~   r   r   -  s     Lf% 5%.ell''(02 	 %%*;E*B		 	
 /u5	 	
 6	 	A), 	 	s   B&3B&&B/	r   )r  placeholdersmutated_input_idxsc                    ddl m}	 t        j                  j                  rEt        j                  |	|||||||t        j                  j                  j                         	      nt        d d fd}
|
S )Nr   )cudagraphify_impl)device_indexstack_tracesr  rU  r  r  r   r  c                ~    't        j                         5   |       d d d         |       S # 1 sw Y   xY wry   )rz   r  )
new_inputsr  cudagraphify_fnmodelr  s    r~   r  zcudagraphify.<locals>.runr  sH    002 T-eZARST:&&T Ts   3<)r  r  r  r   )torch._inductor.cudagraph_treesr  r   r$  cudagraph_trees	functoolspartialr   r   r  r  )r  r  r  r  r  rU  r  r  r   new_cudagraphify_implr  r  r  s   ``         @@r~   cudagraphifyr  P  sr    
 }}$$#++!%%#%%1}}33FFH

 ,K' ' Jr   c                    t        j                  | j                         | j                         | j                  | j
                        S )z1
    Copy and input while preserving strides
    )r
  rv  )r   empty_stridedsizer   r
  rv  )r   s    r~   static_inputr  |  s/     qvvx177188TTr   c                V    t        | |      } t        ||      }| j                  |       y)z=Index into expanded dimensions of both dst and src then copy_N)r:   copy_)dstsrcexpanded_dimss      r~   index_expanded_dims_and_copy_r    s'     c=
1C
c=
1CIIcNr   c                *  	
 t        |      }t        t        |            t        ||       t	        |t
              sJ t        |      D cg c]  \  }}|vrt        |      ng  c}}t        |      D cg c]@  \  }}t	        |t        j                        s|n|vrt        |      n|j                         B c}}t        t        |            D ]8  \  }\  }}t	        |t        j                        s$|vs)t        |   ||       : t        j                  j                          t        j                  j!                         }|j#                  t        j                  j%                                t        j                  j'                  |      5   | t                     ddd       |j                          t        j                  j%                         j#                  |       t        j                  j                          t        j                  j)                         
t        j                  j+                  
|d      5   | t                    ddd       t	        t
        t,        f      sft.        j0                  rd
fd}n1t3        t5                    D cg c]	  }|vs| c}	d	
fd}t7        ||t                     S c c}}w c c}}w # 1 sw Y   ExY w# 1 sw Y   xY wc c}w )zQ
    Assumes inputs[static_input_idxs[i]] are always the same memory address
    Nthread_local)streamcapture_error_modec                   t              t        |       k(  sJ t        t        |             D ]u  \  }\  }}}t        |t        j
                        s%t        |t        j
                        sJ |v r$|j                         |j                         k(  rgJ t        |||       w | j                          j                          	S ry   )
r   r$  zipr   r   r   data_ptrr  rd  replay)
r  r   r  r  r  r   inps_expanded_dimsr  static_inputsstatic_outputss
        r~   r  zcudagraphify_impl.<locals>.run  s    }%Z8882;M:/AB3 K..c3 "#u||4!#u||444++<<>S\\^;;;
 2#sMJK LLN!!r   c                    D ]8  }|   }| |   }t        |t        j                        sJ t        |   ||       : | j	                          j                          S ry   )r   r   r   r  rd  r   )	r  r   r  r  copy_indicesr   r!  r"  r#  s	       r~   r  zcudagraphify_impl.<locals>.run  si    # V 23 7 o!#u||444-mC.@#}U	V
 LLN!!r   )r  list[InputType]r   Callable[[list[InputType]], Any])r   rN   rm   rh   r   r   r$  r9   r   r   r  detachr  r  r   synchronizeStreamwait_streamcurrent_streamr  	CUDAGraphr   r^  r   size_assertsr   r   rf   )r  r  r  check_input_idxsr   r   r  r  r  r%  r   r!  r"  r#  s     `      @@@@@r~   r  r    s    /v7HI)3#F,=>* 6#34fd###  'C !$+< <!"D  '	 C a.  ++ a		M $-S9K-L#M Paa&36G+G)-*<aOP
 
JJZZ F
uzz0023			6	" #d=!"#
	JJ++F3	JJ JJ  "E			%>		R 4tM234ntUm4(*	" 	"* !]!34
CT8TC
	" 	" (-=z|LLY	*# #4 46
s1   K+AK1"K7L;	LL7LLc                   t        | t              sJ |        t        |        |ddini |ddi}|j                  dt        j
                  j                        }|r|j                  d      r"J d       i |dt        | j                        i}|j                  dd       }| j                  j                  dd       }t        j                  j                  |      }t        j                   d      5  t        j                  j#                  |      5  t%        ddd	      5  t'               5  t)        | |t+        j,                  ||
      |      }t        |t.              sJ |j0                  cd d d        cd d d        cd d d        cd d d        S # 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        n# 1 sw Y   nxY wd d d        y # 1 sw Y   y xY w)Nr  Tzaot_inductor.output_pathz.pt2a
  The output path for aot_compile should not have an extension with .pt2 this is for specifying the output path for the .so in AOTInductor. If you would like to package the AOTInductor generated files into a pt2, please call `torch._inductor.aoti_compile_and_package`.r  dynamo_compile_idcompile_fx_aot)rE  reset_event_log_on_exit)r  )inner_compiler  )r   rI   r*   r   r   r  output_pathr>  r.   coder   r   r   r   r  ro   set_aot_compilationcompile_contextr!   r'   
compile_fxr  r  r6   filename)	model_example_inputs_r4  r  r5  r  saved_compile_idsaved_compile_contextcompiled_artifactss	            r~   r2  r2    s    fk*2F2* &f- ! 
44t4  !$$"F$7$7$C$CK ''/ 	
R	
/

&	&++(>

 ,//0H$O{{':DA!MM889IJ	d#+%%&;<+ 	"&$(	
+ 	+ (#++'= *
 ,l;;;!**-+ + + + + + + + + + + + + + +sa   2 GF=!F(,AF	.	F(7	F= 	GFF(	F=(F1-F=4	G=G	GGc                v   ddl m}m}	 t        |        t	        j
                  | d      }
|
rt        | |d        ||         |	|| |      \  }D cg c]  }||   	 }}t        |      }|j                  j                  ^ }}|j                  d   }t        |      D cg c],  \  }}t        |t        j                  j                        s+|. c}}|j                   d<   g }t        j"                  j$                  j'                         }dgd|,|j(                  J |j(                  }t+        dt-        |      dz
        t/        t0                  }|j2                  }|J d}t-        |      dkD  rg t5        t-        |            D ]I  }|vrd ||<   |dkD  r(||   ||dz
     k(  r|dz  }n|j7                  ||          j9                  |       K |j:                  J t5        t-        |j:                              D ]  }||vsd |j:                  |<    |j<                  r|j<                  j>                  }t@        jB                  jE                  |dd      5   ||||||d||
      d d d        tF        jH                  rS d
fd	}d|_%        |S c c}w c c}}w # 1 sw Y   8xY w)Nr   )%convert_conv_weights_to_channels_lastfreezeTr  r   rV   r  )r  r  r  rU  r  r  c           
         D cg c]  }| |t        |         z
      }}| j                           |      S c c}w ry   )minrd  )r   r   args_newmax_offset_idxoptimized_functionpreserved_arg_indicesunwrapped_args_offsetss      r~   wrapperz%fw_compiler_freezing.<locals>.wrapper  sT     +
 +C>,BCCD
 
 	

!(++
s   <)r   zlist[object]r  zSequence[torch.Tensor])&torch._inductor.freezingrA  rB  rO  ra   decide_layout_optr  r$   r   r   r   r$  r   r   r   r   r   r   r   r   params_flat_unwrap_subclassesr   r   rN   r   params_unwrapped_to_flat_indexr   r  r   params_flatr   r   r   r  r  ro   r  _boxed_call)aot_autograd_modelaot_example_inputsdynamo_modelnum_example_inputsr4  r  r  forward_devicerA  rB  r  	opt_modelindr  r   model_outputs_nodemodel_outputsr   rl  r  tracing_contextparams_flat_unwrappreserved_indices_params_flatunwrapped_idxscurrent_offsetr   rJ  rF  rG  rH  rI  s                              @@@@r~   fw_compiler_freezingr_  .  s    W ""45001CRVWJ+-?F-.@A'-($I$ >SSc,S1SS !34I '__22Q&++A.M#M2;QjEHHMM6R;67 mm22::<OSN"<<HHH,JJQ$6 7! ;<(23(9%(GG)))!"Q&%'"s-./ 	:A--(,"1%q5^A..Q2GG"a'N-11.2CD")).9	: **666s?6678 	6A5515++A.	6 && / ; ; P P			9&=t	D 

*/!'5!	


 	!!, , GNQ T;L

 

s   J$&,J)J)'J//J8c                     t         j                  j                  rt        t	        d             t         j                  j
                  t         j                  j
                  n	t               ddddS )Nzcpp wrapper enabledFT)ztriton.autotune_at_compile_timeztriton.autotune_cublasLtztriton.cudagraphsztriton.store_cubin)r   r$  r  r3   r2   autotune_at_compile_timerU   r   r   r~   get_cpp_wrapper_configrb    sY    }}+'(=>	
 }}55A MM22$)""
 
r   c                B   t         j                  j                         st        j                         S t        d t        |       D              }t        |      dk(  r1t         j                  j                  t        t        |                  S t        j                         S )zX
    Returns a cuda device context manager if there is a single device in the graph
    c              3  @   K   | ]  }|j                   d k(  s|  yw)r   N)rw  r  s     r~   r   z*get_cuda_device_context.<locals>.<genexpr>  s       8FKK64I8s   rV   )r   r   r   r  r  rN   r@   r   rv  r!  r"  )r   cuda_devicess     r~   r  r    s     ::""$%%''-7 8,R08 .L |! 	

$tL123 ##%r   c                   !"#$ |rHt        j                  |      5  t         | t        j                  |            ||      cddd       S t         j                  rt        j                  ddit	                     5  t        j                  |      5  |}t         t              r	 j                  j                  D cg c],  }|j                  dk(  r|j                  j                  d      . }}|D 	cg c]   }	t        |	t        j                        r|	nd" }}	t!        d |D              rt#        t%               ||      D ]g  \  }
}}|
t        |t        j                        sJ |j&                  |j&                  k7  s@t)        d|
 d	|j&                   d
|j&                   d       |}ddlm} t/        |      } | |i |      5 \  }}}}}t        ||t1        j2                  d      ||      cddd       cddd       cddd       S t1        j2                  t        ||      }t5               st7         ||      S t         t              r1t         j                  j8                  t:              rt=         ||      S t?        t@        jB                        5  tE               5  t        jF                  jH                  jK                  t         jL                  jN                        5  t         t              rtQ        dd  fd       tR        jU                  dtW        d ddd             tY         j                        t        jZ                  jT                  _.        t_         |       tQ        dd  fd       t!        d |D              r(ta         ||      cddd       cddd       cddd       S t         jb                  rJ te        |      $tg        t         jh                  jj                        !tm        d      "to        tp              #||n	ts               }	 	 	 	 	 	 	 	 d*!"# $fd}t1        j2                  |d      }tu        tv        |      }t         jx                  r5t        jz                         s!t1        j2                  t|         $!#"      }n't1        j2                  |d      }tu        tv        |      }	 	 	 	 	 	 	 	 d+d}t        d      	 	 	 	 	 	 d,!"#fd        }tu        tv        |      }t/        |      xs  t        j                  j                  d!      }t        j                  j                  j                         xs t        j                  j                  |      }t
        j                  r_t        j                  d"      5  t         |d|#      \  }}dd$lHmI}  ||      }|j                  j                  D ]  }|j                  d%k(  sd|j                  vs" t        |j                        |      }t        |t        j                        r!|j                  |d&      |j                  d<   xt        |t        j                        r8t        j                  j                  j                  ||      |j                  d<   t        |t              s||j                  d<    	 ddd       t               }d' j                  v r j                  d'   |j                  d'<   d( j                  v r j                  d(   |j                  d(<   t        j                  j                         }|rt        j                  j                  nt        j                  }t        j                  |      5  t        j                         5   |       5   |||      cddd       cddd       cddd       cddd       cddd       cddd       S t        j                  |      5  t        j                  j                  |      5  t        j                         5  t        j                  d"      5  	  t        |||||d!"|)	       |      cddd       cddd       cddd       cddd       cddd       cddd       cddd       S # 1 sw Y   jxY wc c}w c c}	w # 1 sw Y   nxY w	 ddd       n# 1 sw Y   nxY wddd       # 1 sw Y   xY w# 1 sw Y   4xY w# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       E# 1 sw Y   OxY w# t        $ r} | j                         dd} ~ ww xY w# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       n# 1 sw Y   nxY wddd       y# 1 sw Y   yxY w)-a@  
    Main entry point for compiling given FX graph.  Despite the fact that this
    lives in :mod:`torch._inductor`, this function is responsible for calling
    into AOT Autograd (and we will eventually get a callback to
    ``inner_compile`` to perform actual compilation.  In other words, this
    function orchestrates end-to-end compilation for the inductor backend when
    you use :func:`torch.compile`.

    NB: This function TAKES OWNERSHIP of the input ``model_`` and can potentially
    mutate it!  Make a copy if you need to preserve the original GraphModule.
    )r4  decompositionsignore_shape_envNr  Fr  r   c              3  $   K   | ]  }|d u 
 y wry   r   )r   vs     r~   r   zcompile_fx.<locals>.<genexpr>  s     :q}:s   zBDevice mismatch between fake input and example input at position #z: z vs zx. If the model was exported via torch.export(), make sure torch.export() and torch.aot_compile() run on the same device.r   )_fakify_script_objectsT)r  r  c                     dddS )Nbefore_pre_grad_graphrr  r  r   r   r   r~   r  zcompile_fx.<locals>.<lambda><  s    3 (% r   c                 ^     j                  ddd      dt         j                         z   S NFTrz  z

 # graph id: r~  idr   r;  s   r~   r  zcompile_fx.<locals>.<lambda>@  9    6#8#8!&tD $9 $ &b&6%78$9 r   r  r  zBEFORE PRE GRADr  c                     dddS )Nafter_pre_grad_graphrr  r  r   r   r   r~   r  zcompile_fx.<locals>.<lambda>T  s    2 (% r   c                 ^     j                  ddd      dt         j                         z   S ro  rp  rr  s   r~   r  zcompile_fx.<locals>.<lambda>X  rs  r   c              3  R   K   | ]  }t        |t        t        t        f       ! y wry   )r   r   r^  r  r   s     r~   r   zcompile_fx.<locals>.<genexpr>a  s     Kaz!dE401Ks   %'c           
        t        j                  d      5  |rt        |        t        j                  j
                  j                  t        |            }t        |       }t        j                  rDt        j                  |j                   }t        |      }t        j                  j                  j!                         }|%|j"                  r|s|j"                  j$                  }nd}t'        t(              rXj*                  j,                  ^ }	}
|
j.                  dk(  sJ t        j0                  |
j                        \  }}	t        |      }n|}||k  sJ ||z   }||k  sJ t3        ||      D cg c]+  }t'        ||   t        j4                  j6                        r|- c}|j8                  d<   ng |j8                  d<   t;        |         | |t=        |      |      cd d d        S c c}w # 1 sw Y   y xY w)Nz$compile_fx.<locals>.fw_compiler_baser   r   r   )r  r  r  rU  r  )rz   r%   rO  r   r  r   num_fw_fixed_argumentsr   rl   r   keep_output_strider%  arg_tree_leavesr   r   r   r   r   num_mutated_inp_runtime_indicesr   rI   r   r   r   tree_flattenr   r   r   r   r   r   )r   rI  rU  r   rX  rY  num_model_outputsr   original_output_start_indexr   orig_model_outputs_nodeorig_model_outputsnum_orig_model_outputsorig_output_end_idxr   r  rU  r  r4  r;  rT  s                  r~   fw_compiler_basez$compile_fx.<locals>.fw_compiler_base  s   
 **+QR N1"5--DD&N(; &1_",,$*$:$:<N<S<S$TM(+M(:%#mm::BBDG*w/B/B<#//OO 4 783!&+66<ll6H6H33699XEEE060C0C3881-*A 255G1H.1B.15FFFF  46LL (
 /2CCCC $)79L$K &mC&8%((--H	 K&++,FG KM&++,FG
 ;2>$"&;E&B)%!-/=MN NnKoN Ns   E G870G3'AG83G88Hr  )rS  rT  r4  r  r  rU  c                    t        |       }|5  t        | d       d d d        |j                  dd       }t        j                  dd      5  t        | |fd|d|cd d d        S # 1 sw Y   LxY w# 1 sw Y   y xY w)NT)r;  static_lifetime_input_indicesr   rv  r  )compilerr  )r  rO  r   rz   r%   r   )r   joint_inputsr   r  r  s        r~   partition_fnz compile_fx.<locals>.partition_fn  s    
 326L M .btL	M BH/B) **5T 	 ;  (2O	
 	 	M M	 	s   A'A3'A03A<backward)r  c                   ddl m} t        j                  d      5  |5  t	        |       }t
        j                  rlt        j                  |j                   }t        |      D cg c]+  \  }}t        |t        j                  j                        r|- c}}|j                  d<   ng |j                  d<   t!        |       }t
        j"                  rt        j$                  t'                     nt)        j*                         5   | |t-        t/        |            d
	      cd d d        cd d d        cd d d        S c c}}w # 1 sw Y   nxY w	 d d d        n# 1 sw Y   nxY wd d d        y # 1 sw Y   y xY w)Nr   )compile_lockzcompile_fx.<locals>.bw_compilerr   T)r  r  r  r  r  )torch._dynamo.convert_framer  rz   r%   rl   r   bw_outputs_user_visibler%  r{  r   r$  r   r   r   r   r   r>   r  r  rb  r  r  r   r   )r   rI  r  rX  rY  r   rl  r   r  rU  r  r4  s           r~   bw_compilerzcompile_fx.<locals>.bw_compiler	  s\    A ))*KL &1_"11$*$:$:<N<S<S$TM '0&>K"C%a7 K&++,FG KM&++,FG&r* )) LL!7!9:#//1
 )&*.uU|*<#-$(!)3A !  K  !     sT   E1AE&0E 
A-E!E$	E-	E1 EEE	E1E%	!E11E:r  )unlift_effect_tokens)trace_jointrg  )_detect_fake_mode_from_gmr   )static_shapes dynamo_flat_name_to_original_fqnr1  )	fw_compilerr  inference_compilerrg  r  keep_inference_input_mutationsr  r  rh  )r   rI   rI  r  rU  r  r  r;   )r   rI   r  zSequence[object]r   r  r  ztuple[GraphModule, GraphModule])r   rI   rI  r  r  r;   )_r   r  r9  r  rb  ro   set_real_inputsr   rI   r   r   r   r   r   r   r   r~  r  r	   rv  
ValueErrortorch._export.non_strict_utilsrk  r$   r  r  graph_returns_tuplemake_graph_return_tuple_codegenrT   handle_dynamo_export_graphrS   r  r  r   r   r  preserve_node_metar  r  rG   pre_grad_graphs_logr  r(   rq  r  _pre_grad_graph_idrB  r&   _raise_error_for_testingr   r=   r$  r  r1   r!  _graph_counterr[   r-   r;   freezingis_grad_enabledr_  rH   r  r  r   r   r   r  functorch_configr+   torch._export.utilsr  r
   r   from_tensorScriptObject_libraryfake_class_registrymaybe_to_fake_objrF   r6  _C_is_any_autocast_enabled_DisableAutocastr  r  r  r   _disabletracingrP   rQ   remove_dynamo_frames)%r;  r<  r4  r  rg  rh  inputs_r   fake_inputsinpr   fir   rk  r  patched_mod	fake_argsr   recursive_compile_fxr  r  r  r  r  rZ  r   r'  r  r   r5  disable_ampr   rP  r  rU  r  rT  s%   ` `                              @@@@r~   r9  r9    s	   . \\.) 	:fll>:=I-!1	 	 LL!5,.4	 o.4	 ,;G&+. !' 2 2ww-/ IIMM%(   + &c5<<8CdB 
 :k::&)%';&H "
R>#-a#>>#>!yyAHH4&0&hilhmmo')yykahhZ @o%o'" !"	" *GM(1I'YG  L!"+"3"3Mt"T#1%5 O4	 4	 4	l %,,#%)	 v&& 
 	
 &+&:~, * 
 	
 	}BBCS9 "S9 	--fll.B.BCS9 fk* 9
  %%&%#'#' 	 8:&,,7GEOO!!4/HF9
 K?KK'$kS9 S9 S9 S9v 2222 1 v}}778
 *$/ ' -8N>Q>S 	S	S	/S	 S	 	S	 S	l .UC 	 6j+N??5#8#8#:5>5F5F$##5+%!-6 "+!2!23CRV!W!@."		*	 	 -		6 
'*	="	"	-@"	"	 
>"	H 6j+N$
 J--D-I 	 MM((002 7}}++I6 	
 !''TB 6&7# %#1	'#O J5b9	 HHNN 6Dww*,dii1G!8DKK!8!<%fell;/8/D/D &d 0E 0DIIe, (0B0BC % B B T T$-v!" !IIe,
 (0@A/5DIIe,6#6B (ODK1V[[@GM{{6H  !CD #fkk18>DW8X  !45  ((;;=K-8))j>T>T  + H->-G-G-I H79 H)+GH H H Hw	S9 S9 S9 S9~	 OOI&	9MM!!/2	9 &&(	9 ""=		99
| + +'9#1!-37)/=%5
 /
+	9 	9 	9 	9 	9}	S9 S9 S9 S9y	 	0(  O4	 4	 4	 4	 4	 4	n
6 6hH H H H H H H H* $ 9 ,,.D89%	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9}	S9 S9 S9 S9 S9 S9 S9 S9 S9s  )b1c4,c1b>6c<%c!2c5c
Ac+c=	c	c4(g&3Ag5B5f<*	g3	g&G%f<+A	d	5d	B8d	=d	Cf<d9	,d#4	d=	d#	d9		f<	g!	g&4f<	 f'	)f>e=e(e1	e=:	f	f'		f<	g	g&1b;>
ccc	c4c'	#c44c>df<dd#	d9	#d,(d9	/
f<9e>f<	e%e  e%%e((e1-e=4	f=ff		f'	ff'		f<'f0,f<3	g<gg	g&g	g&&g/c                   t        | t              syt        |       j                  \  }t        |t        t
        f      ryt        |t        j                  j                  j                        rst        |j                  d      r]t        |j                  j                  j                        dkD  r1t        d |j                  j                  j                  D              ryy)z"True if a FX graph returns a tupleT_schemarV   c              3  L   K   | ]  }t        |j                        d k(    yw)r   N)r  rw  )r   rets     r~   r   z&graph_returns_tuple.<locals>.<genexpr>	  s     OcCHH)Os   "$F)r   rI   rl   r   r   r^  r   r   r   r   r  r   r   r  returnsr'  )r   rvs     r~   r  r  	  s    b+&O  ER"tUm$2uxx}}))*BIIy)		!!))*Q.ORYY5F5F5N5NOO r   c                   t        |       }|j                  \  }t        j                  |      \  }| j                  j                  |      5  | j                  j                  |       ddd       | j                  j                  |       t        |       sJ  || |      t        j                        dfd       }|S # 1 sw Y   [xY w)z
    Mutate gm so it returns a tuple.  This is only needed for graphs
    not created by torchdynamo that return non-tuples.
    Nc                 <    t        j                   | i |      S ry   )r%  tree_unflatten)r   r   r  specs     r~   rJ  z(make_graph_return_tuple.<locals>.wrapper	  s     $$[$%A&%A4HHr   )r   r   r   r   r  r   )rl   r   r%  r}  r   inserting_beforer   r`  r  r  wraps)r   r  
compile_gmr   r  rJ  r  r  s         @@r~   r  r  	  s     r?DIIER""2&HB		"	"4	( 
HHr"""R(K__[!I "I N s   CCc                .   | j                   j                  t        j                  j                   j	                         | j                   _        | j                           ||  j                  |       t        j                        dfd       }|S )z
    `torch._dynamo.export` embeds pytrees in the FX graph codegen object,
    convert that to a normal FX graph so inductor can compile it.
    c                 F    j                    j                  |         S ry   )process_outputsprocess_inputs)r   codegenr  s    r~   rJ  z+handle_dynamo_export_graph.<locals>.wrapper	  s'    &&{4JG4J4JD4Q'RSSr   )r   r   r  r   )	r   r  r   r   CodeGenra  r  r  r  )r   r  r  rJ  r  r  s       @@r~   r  r  	  sx     hhG..0BHHLLNR!7!7!7!@AK__[!T "T Nr   c                   dd}t        j                  | j                  j                         | j                        D ]  }t        |t              st        |      }|r,t        |      r!|j                         t        j                  k7  rNt        |      }|j                  d      r y  ||j                                 y )Nc                    ddl m} | J t        | j                        }|j	                  |       }t        j                  |j                   d        |d      )Nr   )rR   z9 does not support bfloat16 compilation natively, skippingzBF16 is not supported)torch._dynamo.excrR   r   rw  get_device_propertiesr   r   r   )rv  rR   device_interfacedevice_propss       r~   warn_and_skipz1_check_triton_bf16_support.<locals>.warn_and_skip	  s\    /!!!3FKK@'==fE  !!Z[	
 /00r   F)including_emulation)rv  zOptional[torch.device]r  r   )	itertoolschaingraph_inputsr  r  r   rc   rb   rB   	get_dtyper   bfloat16r   is_bf16_supported
get_device)r   r  r   r  r  s        r~   r  r  	  s    
1  2 2 9 9 ;U=P=PQ )$'%d++&~~5>>1 4K@--%-Hdoo'()r   )optionsc               B   ddl m}  ||       sJ d       d}d}t        | j                  j                  t
        j                  j                  j                        r| j                  j                  }t
        j                  j                  j                         | j                  _        | j                          |j                  j                  |j                  j                  }|j                  j                  G|j                  j                  }n0t        | d      r| j                  }t        | d      r| j                  }|t!        j"                  |      nd}|t!        j"                  |      nd}	t!        j$                  ||xs i f      \  }
}t'        d |
D              rd	d
lm}m}  ||j.                  d      |
D cg c]&  }t        |d   t
        j0                        r|d   nd( }}|||k7  rt3        d| d|       |||	dni |||	d}||fS c c}w )z
    Flatten the inputs to the graph module and return the flat inputs and options.
    Add "aot_inductor.serialized_in_spec" and "aot_inductor.serialized_out_spec" to the options.
    rV   )r  zGraph output must be a tuple(). This is so that we can avoid pytree processing of the outputs. Please change the module to have tuple outputs.N_in_spec	_out_spec c              3  V   K   | ]!  }t        |d    t        j                         # yw)rV   N)r   r   r  r   s     r~   r   z'_aoti_flatten_inputs.<locals>.<genexpr>
  s!     
MA:adE../
Ms   ')r   )	UserErrorUserErrorTypezTorchBind objects found in inputs. TorchBind object inputs are not supported in AOTInductor. TorchBind objects can only be attributes.z>Trying to flatten user inputs with exported input tree spec: 
z-
but actually got inputs with tree spec of: 
)zaot_inductor.serialized_in_specz aot_inductor.serialized_out_spec)r9  r  r   r   r  r   r   rT   r  ra  pytree_infoin_specout_specr  r  r  r%  treespec_dumpstree_flatten_with_pathr~  r  r  r  INVALID_INPUTr   r  )r   r   r   r  r  r  r  r  serialized_in_specserialized_out_specflat_args_with_pathreceived_specr  r  r   flat_example_inputss                   r~   _aoti_flatten_inputsr  	  s(    0r" 	" GH"((##UXX^^%B%BC((##!HHNN224
&&2))11G''3**33H 2z"kkG2{#||H;B;N..w7TV+3+?h'R  *0)F)F	v|*& 
M9L
MM>''8
 	
 CV=>
1Q4.!D8  }7Mi <o
 	
 ? 0B0C	



/A0C
  ''1s   +H)r}   r  r  z.Callable[[Callable[_P, _T]], Callable[_P, _T]])r   r  r   r  r  ri  )r  ztuple[FxCompileMode, bool])r   r   r  	list[int])r   rI   r  ri  )r  zCallable[..., None]rh  )r  rI   r   rI   r  ri  )r  rI   r   rI   r'  r   r  rI   )F)r   rI   r;  r  r  zGenerator[str, None, None])r   rI   rI  r  r  rI   )r   rI   r;  r  r  ri  )r   rI   rU  r  r  ri  )TNN)
r   rI   rb  r  rc  zOptional[list[str]]rd  z)Optional[Callable[[torch.fx.Node], bool]]r  z"tuple[GraphModule, dict[str, int]])r   rI   r  r  )rI  r  r  "AbstractContextManager[None, None])r  r  r  r  r  r  )r   rI   rI  r  r  r  r  z torch._subclasses.FakeTensorModery   )r  z$Optional[Union[str, dict[str, Any]]]r  zdict[str, Any])r  zGenerator[None, None, None]r  )r   rI   rI  r  r@  r  r  r;   )
r   rI   rI  r  rA  r  r@  r  r  r;   )r  r  r  r  r  r  )r   )r  Callable[..., Any]r  r  r  r   r  zlist[Optional[str]]r  r  rU  r  r  ztuple[torch.Tensor, ...]r  zSequence[PlaceholderInfo]r   ztuple[int, ...]r  r  )r   torch.Tensorr  r  )r  r  r  r  r  r  r  ri  )r  r  r  zlist[torch.Tensor]r  r  r  r'  )
r;  rI   r<  r&  r4  r  r  zOptional[dict[str, str]]r  z%Union[list[Union[str, Weights]], str])rQ  rI   rR  r  rS  rI   rT  r   r4  r  r  r=   r  r   rU  r1   r  z0Callable[[list[object]], Sequence[torch.Tensor]])r  zdict[str, object])r   torch.fx.GraphModuler  zAbstractContextManager[None])r;  rI   r<  r  r4  zCallable[..., OutputCode]r  Optional[dict[str, Any]]rg  z.Optional[dict[OpOverload, Callable[..., Any]]]rh  r  r  zPUnion[Callable[[list[object]], Sequence[torch.Tensor]], str, list[str], Weights])r   rI   r  r  r  r  r  r  )r   ra   r  ri  )
r   r  r   z!Union[list[Any], tuple[Any, ...]]r   r  r  r  r  z tuple[list[Any], dict[str, Any]])
__future__r   r  enumr  r  r  r  r   r   r  r&  r   abcr   r   collectionsr   r   inspectr   r	   operatorr
   typingr   r   r   r   r   r   typing_extensionsr   r   r   r   r   r   unittestr   torch._inductor.async_compiler   torch.fxtorch.utils._pytreer   _pytreer%  functorch.compiler   r   torch._dispatch.pythonr   torch._dynamor   r   r  r   rz   torch._dynamo.device_interfacer   torch._dynamo.repro.after_aotr    r  r!   r"   r#   r$   r%   r&   r'   r(   r)   torch._functorchr  7torch._functorch._aot_autograd.subclass_parametrizationr*   torch._functorch.aot_autogradr+   r,   r-   torch._inductor.codecacher.   r/   r0   r  r1   r2   r3   r4   torch._inductor.debugr5   torch._inductor.output_coder6   r7   r8   r9   r:   r;   'torch._inductor.runtime.cache_dir_utilsr<   torch._inductor.utilsr=   r>   r?   r@   rA   rB   rC   rD   rE   "torch._library.fake_class_registryrF   torch._loggingrG   torch._utils_internalrH   rI   %torch.fx.experimental.symbolic_shapesrJ   rK    torch.fx.passes.fake_tensor_proprL   torch.monitorrM   torch.utils._ordered_setrN   _dynamo.backends.commonrP   _dynamo.excrQ   rR   fx._lazy_graph_modulerS   fx.graphrT   utils._tritonrU   r  rW   codegen.commonrX   rY   r  rZ   decompositionr[   excr\   fx_passes.joint_graphr]   fx_passes.post_gradr^   r_   fx_passes.pre_gradr`   r   ra   irrb   rc   output_coderd   triton_bundlerre   rf   rg   rh   ri   rj   rk   rl   rm   rn   virtualizedro   collections.abcrp   rq   rr   
torch._opsrs   )torch.export.pt2_archive._package_weightsrt   ru   rv   rw   r  r   r   torch._inductor.fb.utils&torch._functorch._aot_autograd.schemasr   r   r   Enumr   r   r  r  r   r   r   _logginggetArtifactLoggerr  r  r  r  r  r   r   r   r   	lru_cacher   cacher   r  r6  r@  rB  rO  rS  rm  ry  r  r  r  r  contextmanagerr  r  r  r  r  r[  r`  rm  r  r   r  r  r  r  r2  r  r_  rb  r  r9  r  r  r  r  r  r   r   r~   <module>r2     s   "    	    	 
   # # -     I I U U  $  $ $ A  ;  D =
 
 
 8 
 O N  A  >
 
 
 @ + ?   W ; & / 2 5 : % &  U  .  5 B /   ' I )
 
 
  3:%A$ t_T](((*% L DII +: %=$> !!g!00<Hnn66xARS ~~77BTU NN44'  ~~77BTU 
4A*'=  T/ / 	
 	
?/DK	K%K8FKK^ 38!!+/!!0NN'N N( 38+/	(	+ "15FJ	E(E(E( /E( D	E(
 (E(P*('('(.	(	(!%	('	( ).' "& &	@ <@(8((  
;y 
; *
*
'*
 '*
 	*
Z 23@@'@ -@ 	@ 4@F
B B# #4s) slYY'Y
 #Y -Y YB  $   J (*) +-.0*,))$) 	)
 &) ) ) () ,) () )XU		  
	 (*\M\M\M %\M &	\MD )9/3	;+;+$;+ &;+ -	;+
 +;+| qc#c+c c 	c
 &c c c %c 6cL&* 0@/3EI"G9G9(G9 -G9 -	G9
 CG9 G9 VG9T$ # 	4 # 	,)D (,R(
 )-R(R(
+R( %R(
 &R( &R(r   