
    rh?                     L   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZ d dlmZ d dlmZ ddlmZ dd	lmZmZ  ej.                  e      Z ej4                  d
       G d d             Z ej4                  d
       G d d             Z ej4                  d
       G d d             Z ej4                  d
       G d d             Z ej4                  d
       G d d             Z ej4                  d
       G d d             Z  G d d      Z!y)    N)Path)Optional)countersdynamo_timedset_feature_use)justknobs_check)FileLock   )triton_cache_dir)_IS_WINDOWSGPU_KERNEL_BIN_EXTST)frozenc                   0    e Zd ZU dZeed<   eed<   eed<   y)TritonBundleEntryz
    When we have compiled a triton kernel, we take note of that kernel by
    its triton generated hash, its device, and where this kernel is located.
    This is the minimum information we can use to later retrieve this kernel
    from file system.
    kernel_hashdevice	directoryN)__name__
__module____qualname____doc__str__annotations__int     q/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/torch/_inductor/triton_bundler.pyr   r      s     KNr   r   c                   L    e Zd ZU dZeed<    ej                  d      Ze	ed<   y)TritonKernelArtifactzq
    Artifact for an individual kernel converted to bytes.
    Bytes could be a cubin, json, ttir, or ttgir.
    filenameF)reprpayloadN)
r   r   r   r   r   r   dataclassesfieldr"   bytesr   r   r   r   r   #   s%    
 M&[&&E2GU2r   r   c                   0    e Zd ZU dZeed<   eed<   ded<   y)StaticallyLaunchedAutotuneraQ  
    Represents a statically compiled CachingAutotuner object that we can
    save directly in the cache. A CachingAutotuner is made up of a list of
    StaticTritonCompileResults, each of which uses the cubin from a TritonKernelArtifact.

    Statically saved here have their cubin files saved by a corresponding TritonBundleEntry.
    	cache_keykernel_nameCachingAutotunerkernelN)r   r   r   r   r   r   r   r   r   r'   r'   .   s     Nr   r'   c                   6    e Zd ZU dZeed<   eed<   ee   ed<   y)TritonKernelArtifactsz:
    Collection of artifacts for a particular kernel.
    r   r   	artifactsN)	r   r   r   r   r   r   r   listr   r   r   r   r-   r-   =   s      K())r   r-   c                   2    e Zd ZU dZee   ed<   ee   ed<   y)TritonBundlerMetadataz+
    Metadata used for instrumentation
    cached_kernel_names statically_launched_kernel_namesN)r   r   r   r   r/   r   r   r   r   r   r1   r1   H   s     c"&*3i/r   r1   c                   2    e Zd ZU dZee   ed<   ee   ed<   y)TritonBundlez7
    Serializable bundle to save into FXGraphCache
    kernel_artifactsstatic_autotunersN)r   r   r   r   r/   r-   r   r'   r   r   r   r5   r5   R   s!     011788r   r5   c                   h   e Zd ZU dZdZeee      ed<   dZ	eee
      ed<   dZeed<   edefd       Zedd	       Zedd
       Zedededdfd       Zededdddfd       Zedeee
   ee   f   fd       Zedeee
      dee   fd       Zedeeee   f   fd       Zededee   fd       Zy)TritonBundlera  
    Lightweight Triton Kernel bundler that notes each time we compile a triton
    kernel. When collect is called, converts all the previously noted kernels and
    their artifacts into a structured bytes blob, and later when write is called
    it writes this structured blob back to file system.

    Intended Life cycle:
    - TritonBundler.begin_compile is called when we start compiling in Inductor
    - TritonBundler.put is called each time a Triton Kernel is compiled
    - TritonBundler.collect is called when a cache entry is being generated
    - TritonBundler.end_compile is called to indicate bundling is completed,
      collect will execute this function as well.
    - TritonBundler.read_and_emit is called when a cache entry is read
    N_entries_static_autotunerss	   [REPLACE]_REPLACE_BYTESreturnc                      ddl m}  | j                  ry| j                  x}|S | j	                         syt        d      S )Nr   configFz9pytorch/remote_cache:bundle_triton_into_fx_graph_cache_v2)torch._inductorr@   force_disable_caches!bundle_triton_into_fx_graph_cache	is_fbcoder   )r@   bs     r   
is_enabledzTritonBundler.is_enableds   sF    *&&999AFH!G
 	
r   c                     t         j                         syt        j                  d       | j                  J g | _        g | _        y)z
        Initializes the TritonBundler.
        The current TritonBundler bundle is finalized by TritonBundler.collect.
        Nz%TritonBundler.begin_compile is called)r9   rF   logdebugr:   r;   clss    r   begin_compilezTritonBundler.begin_compile   s?     '')		9:||###!#r   c                 J    t         j                  d       d| _        d| _        y)zt
        Finalizes the TritonBundler. If collect is not yet called, it
        discards the current bundle.
        z#TritonBundler.end_compile is calledN)rH   rI   r:   r;   rJ   s    r   end_compilezTritonBundler.end_compile   s      			78!%r   r   r   c           	      l    | j                   x}&|j                  t        ||t        |                   yy)z
        Lazily observes that we have seen a Triton kernel compilation. Remembers
        it for when collect is later called.
        N)r:   appendr   r   )rK   r   r   entriess       r   putzTritonBundler.put   s6     ||#G0NN!+v7G7OP 1r   keyr+   r*   c           	         ddl m} |j                  sJ | j                  x}|j	                         }t        j                  |      }|j                          d |_        |j                  t        ||j                  j                  dd      |             |\  |j                  _        |j                  _        |j                  _        |j                  _        |_        y y )Nr   r?   r)   unknown_kernel)rA   r@   use_static_cuda_launcherr;   prepare_for_picklecopydeepcopyprepare_for_caching_reload_kernelrP   r'   inductor_metagetfn__globals__used_global_valsr!   	launchers)rK   rS   r+   r@   rQ   
old_values
new_kernels          r   put_static_autotunerz"TritonBundler.put_static_autotuner   s    *....---G:  224Jv.J**,(,J%NN+,,00@PQ 				%		*		 + ;r   c                    | j                   sg g fS t        j                  dt        | j                                | j                   D cg c]  }|j                   }}t
        d   dxx   dz  cc<   | j                   |fS c c}w )Nz1Saving %d statically launchable CachingAutotunersinductor$triton_bundler_save_static_autotunerr
   )r;   rH   infolenr)   r   )rK   istatic_autotuner_namess      r   collect_static_autotunersz'TritonBundler.collect_static_autotuners   s     %%r6MHHCC**+ >A=S=S%Tamm%T"%TZ !GHAMH))+AAA &Us   A?r7   c                 2   |sg S ddl m} ddlm} t        j                  dt        |             g }t        d      5  |D ]  }	 |j                  j                  D ]  }|j                           	  ||j                        |j                  |j                  <   t         d   d	xx   d
z  cc<   |j#                  |j                          	 ddd       |S # t        $ r+}t        j                  d|j                  |       Y d}~d}~ww xY w# 1 sw Y   |S xY w)zv
        Load statically launchable CachingAutotuners into async_compile.CompiledTritonKernels
        cache.
        r   )CompiledTritonKernels)StaticAutotunerFuturez+Loading %d statically launchable autotunersz+TritonBundler.load_cached_static_autotunerszBFailed to reload cubin file statically launchable autotuner %s: %sNrf   $triton_bundler_load_static_autotunerr
   )torch._inductor.async_compilern   torch._inductor.codecachero   rH   rh   ri   r   r+   compile_resultsreload_cubin_pathRuntimeErrorwarningr)   _cacher(   r   rP   )rK   r7   rn   ro   kernel_namesresultcompile_resultes           r   load_autotunerszTritonBundler.load_autotuners   s    !IGC>DU@VWGH 	8+ 8
*0--*G*G ;&88:; BWMMB%,,V-=-=> $%KLPQQL##F$6$67)8	8, ! $ KK\**
 	8, s6   D+C/AD	D	!D?DD		DDc           
         ddl m} t        j                         s*| j	                          t        dd       t        g g       dfS t        dd       t        dd      5  | j                  }|g }g }|D ]#  }g }t        j                  j                  |j                  |j                        }t        j                  j                  |      sZt        j                  |      D ]}  }t        j                  j                  ||      }		 t        j                  j!                  |	      sJ t#        |	d	      5 }
|
j%                         }|	j'                  d
      rut        j(                  |v r0t*        j-                  dt        j(                  |       t/        d      |j1                  t2        j5                  |      t        j(                        }|j7                  t9        ||             ddd       t:        d   dxx   dz  cc<   t        j                  jA                  |      d   }|tC        jD                         v sZ|j7                  tG        |      jH                          |s|j7                  tK        |j                  |jL                  |             & |jN                  r| jQ                         \  }}ng }g }| j	                          t        ||      tS        ||      fcddd       S t        g g       dfcddd       S # 1 sw Y   "xY w# t<        $ r t*        j?                  dd       Y 4w xY w# 1 sw Y   yxY w)a  
        This is the main function called when a cache write happens. This function
        converts all the previously remembered kernels into bundled format so that
        it can be written into a cache entry.
        This function also finalizes the current bundle.
        r   r?   triton_bundlingFNTzTritonBundler.collectrS   log_pt2_compile_eventrb.jsonz'Bundle contains illegal %s, payload: %szBundle contains illegal bytesrf   triton_bundler_save_kernelr
   zfailed to collect triton kernel)exc_info)*rA   r@   r9   rF   rN   r   r5   r   r:   ospathjoinr   r   existslistdirisfileopenreadendswithr<   rH   rv   AssertionErrorreplacer   encoderP   r   r   	ExceptionrI   splitextr   valuesr   stemr-   r   rV   rl   r1   )rK   r@   rQ   ry   rx   entryr.   r   r    filepathfiler"   	extensionr7   static_kernel_namess                  r   collectzTritonBundler.collect   s    	+'')OO-u5B'--)405TR =	.llG"68*,$ ,E<>I77<<9J9JKD77>>$/ $&JJt$4 E#%77<<h#?X#%77>>(#;;#;!%h!5 "*.))+#+#4#4W#='4'C'Cw'N(+,U,9,H,H,3)*
 /=,K/* )* /6oo(+

4(8-:V:V/&G !* 0 0$87$K!"#"( %Z01MNRSSN %'GG$4$4X$>q$A	$(;(B(B(DD )//X0C0CD?E@ !1 % 1 1 % )M,Z 22557 ;%': )+%*,'!#F,=>@U "5A u=	. =	.z  B'-{=	. =	." "*  ) XII&GRVIWXG=	. =	.s]   B)L;-L5B2L	'L8L;=)L;(A>L;0L;LL L8	4L;7L8	8L;;Mbundlec           
      0   ddl m} t        j                         syt	        dd      5  g }| j
                  D ]  }t        |j                        }t        j                  j                  ||j                        }t        j                  j                  |      r8t        t        j                  |            dk7  rt        j!                  d|       t#        |      j%                  dd       t'        t)        j*                               }t        j                  j                  |d	|       }t        j,                  |       |j.                  D ]3  }t        j                  j                  ||j0                        }	t3        |	d
      5 }
|j4                  }|j0                  j7                  d      r3|j9                  t        j:                  t&        j=                  |            }|
j?                  |       ddd       t@        d   dxx   dz  cc<   t        j                  jC                  |j0                        d   }|tE        jF                         v s|jI                  t#        |j0                        jJ                         6 tL        rctO        |dz         5  t        j                  j                  |      rtQ        jR                  |       t        j8                  ||       ddd       	 t        j8                  ||        |jX                  r t        j[                  | j\                        }ng }t_        ||      cddd       S # 1 sw Y   YxY w# 1 sw Y   +xY w# tT        $ r t        jW                  d|       Y Pw xY w# 1 sw Y   yxY w)a  
        This is the main function called when a cache read happens. This function
        converts the bundled format back into individual files and writes them
        to the filesystem.

        NOTE: When we are writing to the filesystem, we assume exclusive access
        to the target directory.
        This means that if the target folder already exists and is non-empty,
        we bail out.
        Exclusive access means that no other process should be writing to
        or reading from the target directory.
        r   r?   NzTritonBundler.read_and_emitTr   z8Bailing out TritonBundler.read_and_emit, %s is non empty)parentsexist_okztmp.wbr   rf   #triton_bundler_read_and_emit_kernelr
   z.lockz%Directory %s is not empty - skipping!)0rA   r@   r9   rF   r   r6   r   r   r   r   r   r   r   ri   r   rH   rI   r   mkdirr   uuiduuid4makedirsr.   r    r   r"   r   r   r<   r   writer   r   r   r   rP   r   r   r	   shutilrmtreeOSErrorrv   rV   r|   r7   r1   )r   r@   rx   r.   basedirr   rnd_idtmp_dirartifactr   r   r"   r   r   s                 r   read_and_emitzTritonBundler.read_and_emitK  s    	+'')-T
 ;	L ')L#44 .V	*9+;+;<GGLL)2G2GH	77>>),RZZ	5J1Kq1P IIR! W##D4#@ TZZ\*'',,w$vh@G$ ) 3 3 JH!ww||GX5F5FGHh- ,"*"2"2#,,55g>&-oo - < <cjj>S'G 

7+, Z()NOSTTO " 0 01B1B CA FI $7$>$>$@@ %++D1B1B,C,H,HIJ  !)g"56 777>>)4"MM)4

7I67 7V

7I6Y.V` ..&3&C&C,,'# ')#(7JKw;	L ;	L6, ,7 7 # V$KWUVe;	L ;	Lsj   E N	A,M	5ANANAM%
N0M&<NMNM#N&N	NN		NN)r=   N)r   r   r   r   r:   r   r/   r   r   r;   r'   r<   r%   staticmethodboolrF   classmethodrL   rN   r   r   rR   rd   tuplerl   r|   r5   r1   r   r   r   r   r   r9   r9   \   s    37Hht-./6FJ&A!BCJ )NE(
 
 
  
$ 
$ & & c 3 4   s 4F 4  8 B	t/0$s);	<B B % (.I)J K%	c% %N N.	|X&;<<	=N. N.` MLl MLx8M/N ML MLr   r9   )"rX   r#   loggingr   r   r   pathlibr   typingr   torch._dynamo.utilsr   r   r   torch._utils_internalr   torch.utils._filelockr	   runtime.runtime_utilsr   utilsr   r   	getLoggerr   rH   	dataclassr   r   r'   r-   r1   r5   r9   r   r   r   <module>r      s5      	     G G 1 * 3 3 g! d#
 
 $
 d#3 3 $3 d#  $ d#* * $* d#0 0 $0 d#9 9 $9}L }Lr   