
    rh                       U d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z	d dl
Z	d dlmZ d dlmZmZmZ e j"                  j%                  dd      dk(  ZdZdee   fd	Zdee   fd
Zdee   fdZdee   fdZdee   fdZdefdZdefdZe j"                  j%                  dd      dk(  ZdZdZdZ dZ!e"e#d<    eddd      Z$ee#d<    e       Z%ee   e#d<    e       Z&ee   e#d<    eddd      Z'ee#d<   dZ(ee#d<    e       Z)ee   e#d<    e       Z*ee   e#d<    ed d!d      Z+ee#d"<   dZ,ee#d#<   i Z-e.e/e/f   e#d$<   dZ0ee"   e#d%<   d&Z1ed'   e#d(<   e j"                  j%                  d)d*      dk(  Z2ee#d+<   e j"                  j%                  d,d*      dk(  Z3ee#d-<    e        Z4ee#d.<   e j"                  j%                  d/d      dk(  Z5dZ6dZ7e j"                  j%                  d0d      dk(  Z8e j"                  j%                  d1      dk(  Z9e j"                  j%                  d2d      dk(  Z:e j"                  j%                  d3 e       rd*nd      dk(  Z;dZ<dZ=dZ>e j"                  j%                  d4d*      dk(  Z?e j"                  j%                  d5      dk(  Z@dZAe j"                  j%                  d6d7      ZBed8   e#d9<   dZCdZD e       ZEdZFdZGdZHdZIe	j                  j                  j                  e#d:<   dZMe	j                  j                  j                  e#d;<   dZNeee	j                  j                  gdf      e#d<<   dZQeee	j                  j                  gdf      e#d=<   dZReee	j                  j                  j                  gdf      e#d><   dZTeeeUd?   geUd?   f      e#d@<   dZVeeeUd?   geUd?   f      e#dA<   dZWdZXdZYdZZdZ[i Z\e.e/e.e/ef   f   e#dB<   i Z]e.e/e.e/ef   f   e#dC<   dZ^e j"                  j%                  dDd      dk(  Z_dZ`dZaddEdFddGZbe.e/ef   e#dH<   dIZcedJ   e#dK<   dZdg dLZeeUee/eeUd?   geUd?   f   f      e#dM<   dZfee"   e#dN<   dZgdOZhdPZidQZj eddRdST      Zkee#dU<   e j"                  j%                  dV      dk(  Zle j"                  j%                  dW      dk(  Zme j"                  j%                  dX      dk(  Zne j"                  j%                  dY      dk(  ZodZZpee"   e#d[<   dZq e       rdne j"                  j%                  d\      dk(  Zre j"                  j%                  d]d^      j                         Zte j"                  j%                  d_d`      j                         Zue j"                  j%                  dadb      j                         Zvedc   e#dd<   e j"                  j%                  dedb      j                         Zwedc   e#df<   dZxdgZye j"                  j%                  dh      dk(  Zze j"                  j%                  di      dk(  Z{e j"                  j%                  dj      dk(  Z|dkZ}dlZ~dlZe j"                  j%                  dm      dk(  Ze j"                  j%                  dn      dk(  Ze j"                  j%                  do      dk(  Z e"e j"                  j%                  dpd            Ze j"                  j%                  dqdr      Ze j"                  j%                  dsdt      Zdue/defdvZdue/defdwZdue/defdxZe j"                  j%                  dydb      Ze	j                  j                  sdnd*Ze j"                  j%                  dze      dk(  Ze j"                  j%                  d{d*      dk(  Ze j"                  j%                  d|d      dk(  Ze j"                  j%                  d}      dk(  Zd~ZdZdZdZdZe j"                  j%                  d      dk(  ZdZe j"                  j%                  d      dk(  Zee#d<   e j"                  j%                  d      dk(  Zee#d<   e j"                  j%                  ddr      Ze j"                  j%                  dd*      dk(  Zee#d<   dZZe j"                  j%                  dd      dk(  ZdFZdZdZdZdZdZdZdZdZ e"e j"                  j%                  dd             Ze j"                  j%                  dd*      dk(  ZdZdZdZdZdZdZdFZdFZdZdZdZe j"                  j%                  dd*      dk(  Zde	jj                  v xs de	jj                  v Z e       xs eZe j"                  j%                  dd      dk(  ZdZee/   e#d<   dZee/   e#d<   de/fdZ e       Ze/e#d<    eddd      Zee#d<   dZdQZddgZeUeed   e/f      e#d<   dZee#d<    G d d      ZdefdZde"fdZ e       rdn eī       Zee"   e#d<    e       Zee#d<    eddd      Zee#d<   e j"                  j%                  dd*      dk(  Zee#d<   ee/   e#d<    e       ri	 d dlmZ erL eʐj                  e j                  j                  eːj                  de j                        d            Zn eʐj                  d      ZndZdZZe j"                  j%                  dd      dk(  Ze j"                  j%                  dd      dk(  ZdZdZdZdZdZdZdZee#d<   e j"                  j%                  dd*      dk(  ZdZdZdZdZe j"                  j%                  ddr      Zedrk7  Zedk(  rdrneZe j"                  j%                  dd      Zee/   e#d<   e j"                  j%                  d      dk(  ZdZe j"                  j%                  dd*      dk(  Zee#d<   dZee#d<   dZee#d<   dZee#d<   dZee#d<   dZee#d<   dZee#d<   e j"                  j%                  dd*      dk(  Ze j"                  j%                  dd*      dk(  Zee#d<   dZee#d<    G d dë      Z G dĄ dū      Z G dƄ dǫ      Z G dȄ dɫ      Z G dʄ d˫      ZdZed   e#d<   dZed   e#d<    G dЄ dѫ      Z G d҄ dӫ      Zg dԢZeUe/   e#d<   g d֢ZeUe/   e#d<   g ZeUee	j                  e	j                  e	j                  gdf      e#d<    G dل dګ      Z erd dېl  eej                  e          y# eef$ r dZY \w xY w)    N)AnyCallableLiteralOptionalTYPE_CHECKINGUnion)	is_fbcode)Configget_tristate_envinstall_config_moduleTORCHINDUCTOR_INPLACE_PADDING1Freturnc                      t        d      S )N#TORCHINDUCTOR_FX_GRAPH_REMOTE_CACHEr        i/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/torch/_inductor/config.pyfx_graph_remote_cache_defaultr          ABBr   c                      t         j                  j                  d      dk(  ryt         j                  j                  d      dk(  ryy )NTORCHINDUCTOR_VEC_ISA_OKr   T0F)osenvirongetr   r   r   vec_isa_ok_defaultr      s7    	zz~~01S8	zz~~01S8r   c                      t        d      S )N#TORCHINDUCTOR_AUTOTUNE_REMOTE_CACHEr   r   r   r   autotune_remote_cache_defaultr!      r   r   c                      t        d      S )N+TORCHINDUCTOR_BUNDLED_AUTOTUNE_REMOTE_CACHEr   r   r   r   %bundled_autotune_remote_cache_defaultr$      s    IJJr   c                  :    t        dt               sd      S d       S )N/TORCHINDUCTOR_BUNDLE_TRITON_INTO_FX_GRAPH_CACHET)r   r	   r   r   r   )bundle_triton_into_fx_graph_cache_defaultr'   #   s&    9K %) r   c                      d} dt         j                  v r"t         j                  j                  d      dk(  S t               r$t        j
                  j                  d      }|| k  S y)N   &TORCHINDUCTOR_USE_STATIC_CUDA_LAUNCHERr   z-pytorch/inductor:static_cuda_launcher_versionTr   r   r   r	   torch_utils_internaljustknobs_getval_int)STATIC_CUDA_LAUNCHER_VERSIONversions     r   static_cuda_launcher_defaultr1   *   s^    #$ /2::=zz~~FG3NN	''<<;
 666 r   c                      d} dt         j                  v r"t         j                  j                  d      dk(  S t               r&d}t        j
                  j                  |      }|| k  S y)Nr   TORCHINDUCTOR_PROLOGUE_FUSIONr   z(pytorch/inductor:prologue_fusion_versionTr+   )ENABLE_PROLOGUE_FUSION_VERSIONjk_namer0   s      r   prologue_fusion_enabledr6   9   s\    %&"&"**4zz~~=>#EE	<''<<WE888r   "TORCHDYNAMO_AUTO_FUNCTIONALIZED_V2Ti  precompilation_timeout_secondsz0pytorch/remote_cache:enable_local_fx_graph_cacheTORCHINDUCTOR_FX_GRAPH_CACHE)justknobenv_name_forcedefaultfx_graph_cachefx_graph_remote_cache!bundle_triton_into_fx_graph_cachez>pytorch/remote_cache:enable_non_blocking_remote_cache_write_v2-TORCHINDUCTOR_NON_BLOCKING_REMOTE_CACHE_WRITEnon_blocking_remote_cache_writeautotune_local_cacheautotune_remote_cachebundled_autotune_remote_cachez)pytorch/remote_cache:force_disable_caches"TORCHINDUCTOR_FORCE_DISABLE_CACHESforce_disable_caches&unsafe_skip_cache_dynamic_shape_guards!unsafe_marked_cacheable_functionssleep_sec_TESTING_ONLYneeds_fixed_stride_order)rJ   flexible_layout'triton_kernel_default_layout_constraintTORCHINDUCTOR_CPP_WRAPPERr   cpp_wrapper(TORCHINDUCTOR_CPP_WRAPPER_BUILD_SEPARATEcpp_wrapper_build_separatecpp_cache_precompile_headersTORCHINDUCTOR_ONLINE_SOFTMAXTORCHINDUCTOR_SIZE_ASSERTSTORCHINDUCTOR_NAN_ASSERTSTORCHINDUCTOR_SCALAR_ASSERTSTORCHINDUCTOR_ALIGNMENT_ASSERTSTORCHINDUCTOR_MEMORY_PLANNINGTORCHINDUCTOR_USE_FAST_MATHTORCHINDUCTOR_MEMORY_POOLintermediates)nonerZ   outputscombinedmemory_poolpost_grad_custom_pre_passpost_grad_custom_post_passjoint_custom_pre_passjoint_custom_post_passpre_grad_custom_passz+torch._inductor.scheduler.BaseSchedulerNode_pre_fusion_custom_pass_post_fusion_custom_passpre_grad_fusion_optionspost_grad_fusion_options"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCKg-C6?r)   )pre_grad	precisionnum_iterationsrequires_optimizerfx_passes_numeric_check	heuristic)r<   tritonatenrn   mixed_mm_choice)reorder_compute_for_overlap
sink_waitsraise_comms'reorder_for_compute_comm_overlap_passesreorder_prefetch_limitr<   i,     *TORCHINDUCTOR_USE_EXPERIMENTAL_BENCHMARKERz-pytorch/inductor:use_experimental_benchmarker)r<   r;   r:   use_experimental_benchmarkerTORCHINDUCTOR_MAX_AUTOTUNE$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISETORCHINDUCTOR_MAX_AUTOTUNE_GEMM!TORCHINDUCTOR_DISABLE_DECOMPOSE_K
   autotune_num_choices_displayed"TORCHINDUCTOR_FORCE_SAME_PRECISION(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITON,CPP(TORCHINDUCTOR_MAX_AUTOTUNE_CONV_BACKENDSzATEN,TRITON,TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACEDEFAULT)r   
EXHAUSTIVEmax_autotune_gemm_search_space,TORCHINDUCTOR_MAX_AUTOTUNE_FLEX_SEARCH_SPACEmax_autotune_flex_search_spacei    #TORCHINDUCTOR_SEARCH_AUTOTUNE_CACHETORCHINDUCTOR_SAVE_ARGS!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCg      N@g        #TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICE'TORCHINDUCTOR_COORDINATE_DESCENT_TUNING5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONS'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUS#TORCHINDUCTOR_AUTOHEURISTIC_COLLECT TORCHINDUCTOR_AUTOHEURISTIC_USEmixed_mmnamec                 2    t        |       xs t        |       S )N)collect_autoheuristicuse_autoheuristicr   s    r   run_autoheuristicr     s     &A*;D*AAr   c                 l    | t         j                  j                  j                  j	                  d      v S N,)r,   	_inductorconfigautoheuristic_collectsplitr   s    r   r   r     s(    5??))??EEcJJJr   c                 l    | t         j                  j                  j                  j	                  d      v S r   )r,   r   r   autoheuristic_user   r   s    r   r   r   
  s(    5??));;AA#FFFr   $TORCHINDUCTOR_AUTOHEURISTIC_LOG_PATH!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONTORCHINDUCTOR_FORCE_LAYOUT_OPT TORCHINDUCTOR_KEEP_OUTPUT_STRIDETORCHINDUCTOR_WARN_MIX_LAYOUT         .TORCHINDUCTOR_ASSUME_UNALIGNED_FALLBACK_OUTPUTTORCHINDUCTOR_DEBUG_FUSIONdebug_fusionTORCHINDUCTOR_BENCHMARK_FUSIONbenchmark_fusion#TORCHINDUCTOR_ENABLED_METRIC_TABLES(TORCHINDUCTOR_LOOP_ORDERING_AFTER_FUSIONloop_ordering_after_fusion'TORCHINDUCTOR_BENCHMARK_EPILOGUE_FUSION@   TORCHINDUCTOR_MIN_NUM_SPLITTORCHINDUCTOR_BENCHMARK_KERNEL%TORCHINDUCTOR_EMULATE_PRECISION_CASTSdevgit0TORCHINDUCTOR_OPTIMIZE_SCATTER_UPON_CONST_TENSORadd_pre_grad_passesremove_pre_grad_passesc                  r    dt         j                  v rt         j                  d   } nd} | dv s
J d|         | S )NTORCHINDUCTOR_WORKER_START
subprocess)r   forkspawnzInvalid start method: )r   r   )start_methods    r   decide_worker_start_methodr     sR    #rzz1zz">?#   / 
 ~.	/ 
 r   worker_start_methodz(pytorch/compiler:worker_suppress_logging%TORCHINDUCTOR_WORKER_SUPPRESS_LOGGINGworker_suppress_loggingfuse_ddp_with_concat_opschedule_comm_wait).N_fuse_ddp_communication_passes_micro_pipeline_tpc                   *    e Zd ZU dZeed<   dZeed<   y)_collectiveFauto_selecti   #one_shot_all_reduce_threshold_bytesN)__name__
__module____qualname__r   bool__annotations__r   intr   r   r   r   r     s    K/9'9r   r   c                  R    d} d}t         j                  j                  |      }| |k\  S )a   
    TODO: Remove when parallel compiled is fully enabled internally. For rollout, use a
    knob to enable / disable. The justknob should not be performed at import, however.
    So for fbcode, we assign compile_threads to 'None' below and initialize lazily in
    async_compile.py.
    r)   z0pytorch/inductor:enable_parallel_compile_version)r,   r-   r.   )ENABLE_PARALLEL_COMPILE_VERSIONr5   r0   s      r   #parallel_compile_enabled_internallyr     s0     '(#@G##88AG*g55r   c                  &   ddl } | j                  t              }dt        j                  v r0t        t        j                  d         }|j                  d|       |S t        j                  dk(  rd}|j                  d       |S t               rt               sd}|j                  d       |S t        t        d	      rt        t        j                  d            nt        j                         }|sJ t        d
|      }|j                  d|       |S )a!  
    Here are the precedence to decide compile_threads
    1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
       setting this to 1 to make pdb happy.
    2. Set to 1 if it's win32 platform
    3. decide by the number of CPU cores
    r   NTORCHINDUCTOR_COMPILE_THREADSz!compile_threads set to %d via envwin32r)   z"compile_threads set to 1 for win32z"compile_threads set to 1 in fbcodesched_getaffinity    zcompile_threads set to %d)logging	getLoggerr   r   r   r   infosysplatformr	   r   hasattrlenr   	cpu_countmin)r   logcompile_threadsr   s       r   decide_compile_threadsr      s      

H
%C&"**4bjj)HIJ4oF" ! 
	 56  
@B56  r./ $$Q'( 	
 yb),,o>r   r   use_static_cuda_launcherz:pytorch/inductor:static_launch_user_defined_triton_kernels7TORCHINDUCTOR_STATIC_LAUNCH_USER_DEFINED_TRITON_KERNELS)static_launch_user_defined_triton_kernels)TORCHINDUCTOR_STRICT_STATIC_CUDA_LAUNCHERstrict_static_cuda_launcherglobal_cache_dir)parutil.zfb/cacheTORCHINDUCTOR_SHAPE_PADDING#TORCHINDUCTOR_COMPREHENSIVE_PADDING   i   force_shape_padTORCHINDUCTOR_PERMUTE_FUSIONTORCHINDUCTOR_PROFILETORCHINDUCTOR_PROFILE_OUTPUTprofile_bandwidth_output3TORCHINDUCTOR_PROFILE_WITH_DO_BENCH_USING_PROFILINGTORCHINDUCTOR_FREEZINGfreezingfreezing_discard_parametersdecompose_mem_bound_mmassume_aligned_inputs.unsafe_ignore_unsupported_triton_autotune_args"check_stack_no_cycles_TESTING_ONLY*always_complex_memory_overlap_TESTING_ONLY*TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDINGTORCHINDUCTOR_ANNOTATE_TRAININGannotate_training)enable_caching_generated_triton_templatesc                   n   e Zd ZU dZej
                  j                  dd      dk(  Zej
                  j                  dd      dk(  ZdZ	e
e   ed<    eej
                  j                  dd	            Zdej
                  j                  d
ej                  dk(  rdnd      fZeed   ef   ed<   ej
                  j                  dd      dk(  Zej
                  j                  dd      dk(  ZdZe
e   ed<   dZe
e   ed<    ed      Ze
e   ed<   dZed   ed<    eej
                  j                  dd            Zej
                  j                  dd      dk(  Zej
                  j                  dd      dk(  Zej
                  j                  dd      Zej
                  j                  dd      dk(  Z dZ! eej
                  j                  d d            Z"ej
                  j                  d!d      Z#ej
                  j                  d"d      Z$d#Z%dZ&ej
                  j                  d$d      dk(  Z'dZ(y)%cpp$TORCHINDUCTOR_CPP_NO_REDUNDANT_LOOPSr   !TORCHINDUCTOR_CPP_DYNAMIC_THREADSr   Nsimdlen TORCHINDUCTOR_CPP_MIN_CHUNK_SIZE4096CXXdarwinzclang++zg++cxx'TORCHINDUCTOR_CPP_ENABLE_KERNEL_PROFILE TORCHINDUCTOR_CPP_WEIGHT_PREPACKinject_relu_bug_TESTING_ONLYinject_log1p_bug_TESTING_ONLYr   
vec_isa_okoriginal_atenr,   r  inductor_nodedescriptive_names,TORCHINDUCTOR_CPP_MAX_HORIZONTAL_FUSION_SIZE16-TORCHINDUCTOR_CPP_FALLBACK_SCATTER_REDUCE_SUM-TORCHINDUCTOR_CPP_ENABLE_UNSAFE_MATH_OPT_FLAG5TORCHINDUCTOR_CPP_ENABLE_FLOATING_POINT_CONTRACT_FLAGoff)TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTICF#TORCHINDUCTOR_CPP_GEMM_MAX_K_SLICES%TORCHINDUCTOR_CPP_GEMM_CACHE_BLOCKING%TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORST$TORCHINDUCTOR_CPP_USE_DECOMPOSE_TANH))r   r   r   threadsr   r   r   no_redundant_loopsdynamic_threadsr  r   r   r   min_chunk_sizer   r   r  tupler   strenable_kernel_profileweight_prepackr  r  r   r  r   r  max_horizontal_fusion_sizefallback_scatter_reduce_sumenable_unsafe_math_opt_flag#enable_floating_point_contract_flagenable_tiling_heuristicsenable_grouped_gemm_templategemm_max_k_slicesgemm_cache_blockinggemm_thread_factorsenable_loop_tail_vecenable_concat_linearuse_decompose_tanhuse_small_dequant_bufferr   r   r   r  r    s\   G
 	

=sCsJ  jjnn%H#NRUUO!GXc]!(JFSTN 	


u3<<8+CiO&Cwt}c!	"  	

@#F#M 
 ZZ^^$FLPSSN
 37 (3-637!8C=7 "22L!MJM 	 wHI 
 "%


EtL" 	

FLPSS   	

FLPSS   +-**..?+' 	

BCHCO 
 $)  BJJNN+PRUVW **..)PRVW **..)PRVW   !
 	

=sCsJ 
  %r   r  c                      e Zd ZU ej                  j                  d      dk(  ZdZdZdZ	e
eeeeedf   f         ed<   dZdZ e       rdndZdZd	Ze
e   ed
<   dZdZdZdZdZdZdZej                  j                  d e       sdnd      dk(  Zeed<   dZe
e   ed<   dZeed<   dZ dZ!dZ"e
e   ed<   dZ#eed<   dZ$eed<   dZ%dZ&ej                  j                  dd      dk(  Z'ej                  j                  dd      dk(  Z(dZ)e*d   ed<   ej                  j                  dd      dk(  Z+ej                  j                  dd      dk(  Z,dZ- eej                  j                  dd            Z.e*d   ed<   ej                  j                  dd      dk(  Z/dZ0dZ1dZ2eed <   dZ3dZ4e
e5   ed!<   dZ6ej                  j                  d"d      dk(  Z7ej                  j                  d#d      dk(  Z8dZ9y)$ro   TORCHINDUCTOR_CUDAGRAPHSr   TFN.cudagraph_capture_sizesr   2   "cudagraph_dynamic_shape_warn_limit&TORCHINDUCTOR_COALESCE_TILING_ANALYSISr   coalesce_tiling_analysis	max_tilesprefer_nd_tilingautotune_at_compile_timeautotune_with_sample_inputstile_reductions!TORCHINDUCTOR_UNIQUE_KERNEL_NAMES&TORCHINDUCTOR_UNIQUE_USER_KERNEL_NAMESr  r  r  #TORCHINDUCTOR_PERSISTENT_REDUCTIONS$TORCHINDUCTOR_COOPERATIVE_REDUCTIONSTORCHINDUCTOR_MULTI_KERNEL)r   r)         multi_kernelTORCHINDUCTOR_DIVISIBLE_BY_16      spill_thresholdr  ENABLE_PERSISTENT_TMA_MATMULTORCHINDUCTOR_SKIP_L1):r   r   r   r   r   r   
cudagraphscudagraph_treescudagraph_skip_dynamic_graphsr<  r   r)  r   r   r   slow_path_cudagraph_asserts!cudagraph_trees_history_recordingr	    cudagraph_support_input_mutation#cudagraph_unexpected_rerecord_limitr>  force_cudagraph_syncforce_cudagraphs_warmupfast_path_cudagraph_assertsskip_cudagraph_warmupdebug_sync_graphdebug_sync_kerneldense_indexingr@  r   rA  rB  autotune_pointwiseautotune_cublasLtrC  rD  rE   tiling_prevents_pointwise_fusion tiling_prevents_reduction_fusionunique_kernel_namesunique_user_kernel_namesr  r   persistent_reductionscooperative_reductionsforce_cooperative_reductionsrM  divisible_by_16min_split_scan_rblockstore_cubinrQ  use_block_ptrr  r*  codegen_upcast_to_fp32enable_persistent_tma_matmulskip_l1_cache.disallow_failing_autotune_kernels_TESTING_ONLYr   r   r   ro   ro   F  s    :;sBJ O %*! MQXeE#uS#X2F,G&HIP #' ).% 1:u$ +.' 9;&: ! $ #( "   N 	

4cRU	
 	 d   $Ix}# #d"   04htn3
 ).- "OT! (,$'+$ 	

:C@CG  	

?EL  	 wHI  	

<cBcI  	

=sCsJ 
 $)  ),


3S9)L'*% 
 jjnn%DcJcQO   K OS M
 37 (3-6 "
 	

5s;sB ! JJNN#:C@CGM 6;2r   ro   c                   <   e Zd ZU dZdZej                  j                  dd      dk(  Zej                  j                  dd      Z	ej                  j                  dd      Z
ed	   ed
<   ej                  j                  dd      ZdZdZdZeed<   dZeed<   dZeed<   dZeed<   i Zeeef   ed<   ej                  j                  dd      dk(  Zeed<   ej                  j                  dd      dk(  Zeed<    eej                  j                  dd            Zeed<   i Zeeef   ed<   dZeed<   dZeed<   dZ eed<   dZ!eed<    e"        Z#eed <   dZ$eed!<   dZ%eed"<   dZ&e'e   ed#<   i Z(ee)jT                  jV                  e,e   f   ed$<   dZ-e'e,e      ed%<   y)&aot_inductorz9
    Settings for Ahead-Of-Time Inductor Compilation
    r   AOT_INDUCTOR_DEBUG_COMPILEr   r   &AOT_INDUCTOR_COMPILE_WRAPPER_OPT_LEVELO1-AOT_INDUCTOR_DEBUG_INTERMEDIATE_VALUE_PRINTER)r   r   23 debug_intermediate_value_printer&AOT_INDUCTOR_FILTERED_KERNELS_TO_PRINTNFuse_runtime_constant_foldingforce_mmap_weightspackagepackage_cpp_onlymetadata/AOTINDUCTOR_RAISE_ERROR_ON_IGNORED_OPTIMIZATION#raise_error_on_ignored_optimizationDUMP_AOTI_MINIFIERdump_aoti_minifierAOTINDUCTOR_REPRO_LEVELrK  repro_levelpresetsallow_stack_allocationuse_minimal_arrayref_interfaceTpackage_constants_in_sopackage_constants_on_diskprecompile_headersembed_kernel_binaryemit_multi_arch_kernelmodel_name_for_generated_filescustom_ops_to_c_shimscustom_op_libs).r   r   r   __doc__output_pathr   r   r   debug_compilecompile_wrapper_opt_levelr{  r   r   filtered_kernel_namesserialized_in_specserialized_out_specr}  r   r~  r  r  r  dictr*  r  r  r   r  r  r   r  r  r  r  r	   r  r  r  r  r   r  r,   _ops
OpOverloadlistr  r   r   r   rt  rt    s    KJJNN#?ELM !#

0$! EGJJNN7E$g.@&A 
 JJNN0$   */ $.  %$GT"d"  "Hd38n! 	

H#NRUU ( 
  "zz~~.BCHCOO 2::>>*CQGHKH !GT#s(^  $)D( ,1"D0 %)T( ',t+ $-;. !&%
 $)D( 59"HSM8 EG4

 5 5tCy @AF*.NHT#Y'.r   rt  c            
       *   e Zd ZU dZdZee   ed<   dZee   ed<   dZ	e
d   ed<   dZdZdZdZej                   j#                  d	ej$                  j'                  ej$                  j)                  ej$                  j+                  ej.                        d
                  ZdZee   ed<   g dZee   ed<   ej                   j#                  dd      dk(  ZdZdZee   ed<   dZ eed<   ej                   j#                  dd      dk(  Z!e"ed<   ej                   j#                  d      Z#ee   ed<   ej                   j#                  d      Z$ee   ed<   ej                   j#                  dd      Z%eed<   ej                   j#                  d      Z&ee   ed<   ej                   j#                  dd      dk(  Z'e"ed<   ej                   j#                  d d      dk(  Z(e"ed!<   ej                   j#                  d"d#      Z)eed$<   d%Z*e"ed&<   dZ+e"ed'<   dZ,e"ed(<   y))cudaz9Settings for cuda backend, today this consists of cutlassNarchr0   -O1)-O0r  -O2-O3z-OScompile_opt_levelFTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/cutlass_max_profiling_configs)r)   rK  r   r   %cutlass_max_profiling_swizzle_optionsCUTLASS_EPILOGUE_FUSIONr   r   cuda_cxxr)   cutlass_backend_min_gemm_size/INDUCTOR_CUDA_BACKEND_GENERATE_TEST_RUNNER_CODEgenerate_test_runnerTORCHINDUCTOR_CUTLASS_ALLOWLISTcutlass_op_allowlist_regexTORCHINDUCTOR_CUTLASS_DENYLISTcutlass_op_denylist_regex)TORCHINDUCTOR_CUTLASS_INSTANTIATION_LEVELcutlass_instantiation_levelTORCHINDUCTOR_CUTLASS_PRESETScutlass_presets+TORCHINDUCTOR_CUTLASS_HASH_WITH_COMPILE_CMDcutlass_hash_with_compile_cmd"TORCHINDUCTOR_CUTLASS_PRESCREENINGcutlass_prescreening!TORCHINDUCTOR_CUTLASS_ENABLED_OPSallcutlass_enabled_opsTuse_binary_remote_cacheupload_to_binary_remote_cachebinary_remote_cache_force_write)-r   r   r   r  r  r   r*  r   r0   r  r   enable_cuda_ltoenable_ptxas_infoenable_debug_infouse_fast_mathr   r   r   pathabspathjoindirnamer,   __file__cutlass_dirr  r   r  r  cutlass_epilogue_fusion_enabledcutlass_tma_onlyr  r  r  r   r  r  r  r  r  r  r  r  r  r  r   r   r   r  r    sJ   C
 D(3-
 "GXc]! EJw@AI O   M **..#
GGLL8:ST	
K 48!8C=7 8D)49C 	

0#6#= $
  #Hhsm" *+!3*
 	

H#NRUU $  13

)1  02zz~~(0x}  (*zz~~3S(  &(ZZ^^4S%TOXc]T 	

DcJcQ "4  	

;SASH $   "zz~~+U  
 %)T( +0!4/ -2#T1r   r  c                   f   e Zd ZU g Zee   ed<   ddgZee   ed<   dZe	d   ed<   dZ
dZd	Zd	ZdZd
Zee   ed<   ej$                  j'                  d      Zej$                  j'                  dd      dk(  Zeed<   d
Zee   ed<   d
Zee   ed<   d
Zee   ed<   dZeed<   d
Zeee      ed<   dZeed<   y
)rocmr  gfx90agfx942ck_supported_archr  )	r  r  r  r  z-Osz-Ozz-Ominz-Ofastz-Omaxr  FTN	rocm_homeTORCHINDUCTOR_CK_DIR-INDUCTOR_CK_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   r  n_max_profiling_configsck_max_profiling_configsck_tile_max_profiling_configsuse_preselected_instanceskBatch_sweeprP  split_k_threshold)r   r   r   r  r  r*  r   r  r  r   is_debug
save_tempsr  flush_denormalsprint_kernel_resource_usager  r   r   r   r   ck_dirr  r   r  r   r  r  r  r  r  r   r   r   r  r    s    D$s) %-h#7tCy7 	 wL 
 H J M O #(  $Ix}# ZZ^^23F 	

FLPSS $ 
 .2Xc]1 /3hsm2 48!8C=7 ',t+ )-L(49%,  sr   r  )r  ro   halidecpu_backend)ro   r  cuda_backendc                   J    e Zd ZU dZdZdZed   ed<   dZed   ed<   dZ	dZ
dZy	)
r  hostz	host-cudaAnderson2021)r  Li2018	Adams2019Mullapudi2016scheduler_cudar  scheduler_cpuFN)r   r   r   
cpu_target
gpu_targetr  r   r   r  assertsdebugscan_kernelsr   r   r   r  r  N  sQ    J J
 	 GRS  	 7QR 
 G E Lr   r  c                      e Zd ZU ej                  j                  dd      dk(  Zej                  j                  dd      dk(  ZdZe	e
   ed<   dZdZdZdZdZdZdZej                  j                  d	d      dk(  Zej                  j                  d
d      dk(  Zej                  j                  dd      Zej                  j                  dd      ZdZdZe	ee
gdf      ed<   ej                  j                  dd      dk(  ZdZeed<   y)traceTORCH_COMPILE_DEBUGr   r   TORCH_COMPILE_DEBUG_SAVE_REALN	debug_dirFTINDUCTOR_POST_FUSION_SVGINDUCTOR_ORIG_FX_SVGINDUCTOR_DOT_GRAPH_SHAPE_SVG INDUCTOR_LOG_URL_FOR_GRAPH_XFORM
upload_tarLOG_AUTOTUNE_RESULTS1log_inductor_triton_kernel_to_post_grad_node_info)r   r   r   r   r   r   enabledsave_real_tensorsr  r   r*  r   	debug_loginfo_logfx_graphfx_graph_transformedir_pre_fusionir_post_fusionoutput_codegraph_diagramdraw_orig_fx_graphdot_graph_shapelog_url_for_graph_xformcompile_profiler  r   log_autotuning_resultsr  r   r   r   r   r  r  j  s1   jjnn2C8C?G 

'FLPSS  $Ix}# I H H   M N K JJNN#=sCsJM (>DK jjnn%CTJO !jjnn-OQUV O 37J3%+./6ZZ^^,BCHCO ?C5tBr   r  )
ztrace.upload_tarra   rb   rc   zaot_inductor.repro_levelzaot_inductor.dump_aoti_minifierr_   r`   r   rd   _save_config_ignore)r  zcuda.cutlass_dirr   r   r`   r_   r   rd   r  r=   r>   rB   rC   _cache_config_ignore_prefixexternal_matmulc                   d    e Zd ZU dZeed<   dZee   ed<   dZ	dZ
dZee   ed<   dZee   ed<   dZy)test_configsF%force_extern_kernel_in_multi_templateNmax_mm_configsautotune_choice_name_regexautotune_choice_desc_regex)r   r   r   r  r   r   r  r   r   runtime_triton_dtype_assertstatic_cpp_dtype_assertr  r*  r  *graphsafe_rng_func_ignores_fallback_randomr   r   r   r  r    sM    27)47$(NHSM("'# 15404416.r   r  )*(  r   r   typingr   r   r   r   r   r   r,   !torch._inductor.custom_graph_passtorch._environmentr	   torch.utils._config_moduler
   r   r   r   r   inplace_paddingcan_inplace_pad_graph_inputr   r   r   r!   r$   r'   r1   r6   enable_auto_functionalized_v2r  disable_progressverbose_progressr8   r   r   r=   r>   r?   rA   rB   rC   rD   rF   rG   rH   r  r*  rI   rL   rN   rP   rQ   online_softmaxdcestatic_weight_shapessize_assertsnan_assertsscalar_assertsalignment_assertspick_loop_ordersinplace_buffersallow_buffer_reusememory_planningr  bfloat16_atomic_adds_enabledr^   benchmark_harnessepilogue_fusionprologue_fusionepilogue_fusion_firstpattern_matcherb2b_gemm_passr_   r   custom_graph_passCustomGraphPassTyper`   ra   fxGraphrb   rc   graphrd   r  re   split_cat_fx_passes efficient_conv_bn_eval_fx_passesis_predispatchgroup_fusionbatch_fusionrf   rg   reorder_for_localitydynamic_scale_rblockforce_fuse_int_mm_with_muluse_mixed_mmrm   rq    reorder_for_compute_comm_overlapru   rv   reorder_for_peak_memoryestimate_op_runtimeintra_node_bwinter_node_bwry   max_autotunemax_autotune_pointwisemax_autotune_gemmdisable_decompose_kr   graph_partitionforce_same_precisionuppermax_autotune_gemm_backendsmax_autotune_conv_backendsr   r   autotune_fallback_to_atenunbacked_symint_fallbacksearch_autotune_cache	save_argsautotune_in_subproc+max_autotune_subproc_result_timeout_seconds-max_autotune_subproc_graceful_timeout_seconds.max_autotune_subproc_terminate_timeout_secondsautotune_multi_devicecoordinate_descent_tuning'coordinate_descent_check_all_directions coordinate_descent_search_radiusr   r   r   r   r   autoheuristic_log_pathr0   hiplayout_opt_defaultlayout_optimizationforce_layout_optimizationkeep_output_stridewarn_mix_layoutrealize_reads_thresholdrealize_opcount_thresholdrealize_acc_reads_thresholdfallback_randomimplicit_fallbacks assume_unaligned_fallback_outputaggressive_fusionr   r   enabled_metric_tablesr   score_fusion_memory_thresholdbenchmark_epilogue_fusion max_epilogue_benchmarked_choicesmax_fusion_size)max_fusion_buffer_group_pairwise_attemptsmax_pointwise_cat_inputsforce_pointwise_catunroll_reductions_thresholdcomment_originconv_1x1_as_mmsplit_reductionsmin_num_splitbenchmark_kernelconstant_and_index_propagationalways_keep_tensor_constantsassert_indirect_indexingcompute_all_boundscombo_kernelsbenchmark_combo_kernelcombo_kernels_autotunecombo_kernel_allow_mixed_sizes#combo_kernel_foreach_dynamic_shapesjoint_graph_constant_foldingdebug_index_assertsemulate_precision_casts__version__is_nightly_or_sourcedeveloper_warnings"optimize_scatter_upon_const_tensorr   r   r   r   r   _fuse_ddp_communication_fuse_ddp_bucket_sizer   r   r   r   r   r   r   r   r   libfb.pyr   __package__get_dir_pathr  r  replacesepr   
ValueErrorImportErrorkernel_name_max_opsshape_paddingcomprehensive_paddingpad_channels_lastdisable_padding_cpupadding_alignment_bytespadding_stride_thresholdpad_outputsbw_outputs_user_visibler   permute_fusionprofiler_mark_wrapper_callgenerate_intermediate_hooksdebug_ir_traceback_raise_error_for_testing_profile_varprofile_bandwidthprofile_bandwidth_regexr   /profile_bandwidth_with_do_bench_using_profilingdisable_cpp_codegenr   r   r   r   r   r   r  enable_linear_binary_foldingr  r  r  ro   rt  r  r  r  r  r  r  r
  r  r  Tensorr  torch.utils._config_typingmodulesr   r   r   r   <module>r     sM   	 
 I I  ( ( V V **..!@#F#M# Cx~ CHTN Cx~ CKx~ K8D> d 
 
 JJNN7=D 
 	     '.  - ?1  )F(G x~ G ./ "8D>  )/MB)   " d ! )F(G x~ G 1V0W x~ W $87 d  05 & 4 57 !4S> 6 )-  ,  (1*  JJNN#>DKT K JJNN=sCsJ D  *3_ d 4 >DK    zz~~:C@CGjjnn89S@ >DK JJNN4Y[ccR
       **..!@#F#M 

<=D  $  HJzz~~HWCD 
    *+     TX 5??<<PP WTX EOO==QQ X EI x%((..)94)? @A HEI 588>>*:D*@!AB I
 JN hx)=)=(>(DEF M 	 	;	<=:;	= 	 	 (	;	<=:;	= 	   $)     ( 68 c4S>12 7 79 $sDcN23 8   zz~~&JCPTWW  #   	+ c3h  FQAB P $)   (	?@A>?A	
	*  )-  ,        &,?<& d  zz~~:;sB (NOSVV  JJNN#DEL  jjnn%HISP  13  2  KDRZZ^^,PQUXX   ZZ^^.0A%'   ZZ^^.%'  DF::>>2ID%' (? @ 
 DF::>>2ID%' (? @ 

 "     

'LMQTT JJNN45<	 jjnn%HISP 
 /3 +03 -14 . 

'LMQTT  JJNN<=D  JJNNJKsR ( $'JJNN<cB$   

'LbQ JJNN#DjQ BC BD BK K KGC GD G *I 
 !& 1 1Ss JJNN68JKsR  JJNN+KSQUXX  ZZ^^$FLPSS  **..!@ASH
         JJNNCDK !
   ZZ^^$@ASHd H(HISP $ P

'LbQ JJNN=sCsJ D  !#  JJNN<cBcI 
 $%    -/ )            BJJNN#@!DE::>>"BCHCO  "&   %           "# &+ #  $    JJNN:C@CG 
  1 11OUe>O>O5O [8$8  JJNNEsKsR #
 &* Xc] )(,  ,
C 
 67 S 7 !'7:!        I U8I+>+C%D E 
 ! D  : :
6T 6! !J *3:P:R# R ">!? $ ? 39IL3 )4  JJNN>DK T 
 3- ;
 $3w33[00bff=zJ   4w33J?
    

<cBcI JJNN8#>#E       (          >DK # 
 $    ! zz~~5r: B&  , 3"  +-**.."D+ (3- 
 JJNNHISP 0    8#>#E$ E %* T )  %  $
 $ t # 8= . < ,1 "D 0 49 *D 8 JJNN?EL  **..)JCPTWW 4 W 38 )4 7r% r%lC; C;Lr/ r/jA2 A2H?  ? F 38W./ 7 -5g() 4 8@C @CF" T#Y * T#Y . UWhellELLI4OPQ V7 7  , ckk(+ ,M $   s   A's 	s$#s$