
    rhM                         d dl mZmZmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlmZ d	efd
Zd Zd Z ed       G d d             Z G d de      Zy)    )BaseBackend	GPUTargetLanguage)irpassesllvmamd)knobs)	dataclass)AnyDictTuple)
ModuleTypeN)Pathtargetc                     d S )Nc                      y)N   r   r    )lhs_typerhs_types     o/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/triton/backends/amd/compiler.py<lambda>z"get_min_dot_size.<locals>.<lambda>   s        r   r   s    r   get_min_dot_sizer      s
     0/r   c                 t    t         j                  j                  | dk(  S t         j                  j                  S Ngfx942)r
   r	   use_block_pingpongarchs    r   is_pingpong_schedule_enabledr$      s+    !&!=!=!EDHg599KgKggr   c                 t    t         j                  j                  | dk(  S t         j                  j                  S r   )r
   r	   use_in_thread_transposer"   s    r   is_in_thread_transpose_enabledr'      s.    !&!B!B!JDHqPUPYPYPqPqqr   T)frozenc                   R   e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   dZ	e
ed	<   d
Zeed<   dZeed<   dZeed<   dZeed<   dZee   ed<   dZee   ed<   dZeed<   dZee   ed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   d Zeed!<   d"Zeed#<   d$ Zd% Z y)&
HIPOptions   	num_warpsr   waves_per_eu   
num_stagesnum_ctasNextern_libsr   cluster_dimsFdebugTsanitize_overflowr#   )fp8e5supported_fp8_dtypesr   !deprecated_fp8_dot_operand_dtypesieeedefault_dot_input_precision)r8   allowed_dot_input_precisionsenable_fp_fusionlaunch_cooperative_gridr   matrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_namenoneschedule_hintc                 8   t        | j                  dd       }|dk\  rdnd}t        j                  | d|       | j                  dkD  r| j                  | j                  dz
  z  dk(  sJ d	       | j                  d
k(  r| j
                  dk(  sJ d       t        t              j                  dz  }| j                  i nt        | j                        }dD ]  }t        || dz        ||<    t        j                  | dt        |j                                      y )N   
       @   	warp_sizer   r   znum_warps must be a power of 2gfx950zgfx950 only accepts kpack == 1lib)ocmlocklz.bcr1   )intr#   object__setattr__r,   r>   r   __file__parentr1   dictstrtupleitems)self	gfx_majorrK   default_libdirr1   rM   s         r   __post_init__zHIPOptions.__post_init__E   s   		!B(	#r/Br	4i8~~!t~~!9K'LQR&R 	0/	0R 99 ::?D$DD?h..6 ,,4b$t?O?O:P# 	AC">se3K#?@K	A4k6G6G6I0JKr   c           	          dj                  | j                  j                         D cg c]  \  }}| d|  c}}      }t        j                  |j                  d            j                         S c c}}w )N_-utf-8)join__dict__rX   hashlibsha256encode	hexdigest)rY   namevalkeys       r   hashzHIPOptions.hashU   s]    hh9L9L9NOID#4&#OP~~cjj12<<>> Ps   A4
)!__name__
__module____qualname__r,   rP   __annotations__r-   r/   r0   r1   rU   r2   rW   r3   boolr4   r#   rV   r6   r   r7   r9   r:   r;   r<   r=   r>   r?   r@   rB   rD   r\   rj   r   r   r   r*   r*      s    IsL#JHcK#L%#E4"t"D#'2%*246%uSz6'--/9 %*9!d!$)T) !#!E3N$$)*!3*L#"  M3L ?r   r*   c                   R    e Zd Zedefd       Zdeddf fdZdefdZde	fdZ
d Zd	 Zdeeef   fd
Zd Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zd Z ej:                         d        Z xZS )
HIPBackendr   c                      | j                   dk(  S )NrA   )backendr   s    r   supports_targetzHIPBackend.supports_target\   s    ~~&&r   returnNc                 j    t         |   |       t        |j                  t              sJ d| _        y )Nhsaco)super__init__
isinstancer#   rV   
binary_ext)rY   r   	__class__s     r   ry   zHIPBackend.__init__`   s+     &++s+++!r   c                      d|j                    S )Nhip:r"   rY   optionss     r   get_target_namezHIPBackend.get_target_namee   s    gll^$$r   c                    dt         j                  j                  xs | j                  j                  i}| j                  j                  dk(  rBt        t        j                        }|j                  dh       t        t        |            |d<   d|vrt        t        j                        }| j                  j                  dk(  r|j                  h d       nX| j                  j                  dk(  r|j                  dd	h       n+d
| j                  j                  v r|j                  dd	h       t        t        |            |d<   d|vrt         j                  j                  |d<   |j                  t        j                  j                         D ci c]  }||v r||   |||    c}       t        di |S c c}w )Nr#   r    tf32r:   r6   >   fp8e4b8fp8e4nvfp8e5b16rL   r   r5   gfx12r;   r   )r
   runtimeoverride_archr   r#   setr*   r:   updaterW   sortedr6   languagedefault_fp_fusion__dataclass_fields__keys)rY   optsargsr:   r6   ks         r   parse_optionszHIPBackend.parse_optionsh   s   33Gt{{7G7GH ;;x'+.z/V/V+W((//938@\9]3^D/0!-#&z'F'F#G {{8+$++,NO!!X-$++Y,@ADKK,,,$++Y,@A+08L1M+ND'(T)',~~'G'GD#$)H)H)M)M)O ;AT	d1g&9 QZ ; 	<!D!!;s   F=c                     |j                   |j                  |j                  |j                  d   |j                  d   |j                  d   fS )Nr   r   r.   )r,   r0   sharedr2   )rY   metadatas     r   pack_metadatazHIPBackend.pack_metadata   sO    OO!!!$!!!$!!!$
 	
r   c                 0    dt        | j                        iS )Nmin_dot_size)r   r   r   s     r   get_codegen_implementationz%HIPBackend.get_codegen_implementation   s     0 =>>r   c                     ddl m} d|iS )Nr   )	libdeviceztriton.language.extra.libdevice)triton.language.extra.hipr   )rY   r   s     r   get_module_mapzHIPBackend.get_module_map   s    719==r   c                 .    t        j                  |       y N)r	   load_dialects)rY   ctxs     r   r   zHIPBackend.load_dialects   s    #r   c                     dd l }d}t        | d      r| j                         |k  S t        | |j                        r-t        | d      r!| j                         j                         |k  S y)Nr   i	ptr_rangeuntyped_storageF)torchhasattrr   rz   Tensorr   size)argr   
MAX_INT_32s      r   is_within_2gbzHIPBackend.is_within_2gb   s]    
3$==?j00c5<<(WS:K-L&&(--/:==r   c                 H    t        j                  |       }d| v r|ddggz  }|S )NSztt.pointer_rangerI   )r   
parse_attr)descrets     r   r   zHIPBackend.parse_attr   s1    $$T*$;',--C
r   c                     t        j                  | |fi |}t        j                  j                  r|dk(  rt
        j                  |       r|dz  }|S )Ntensorr   )r   get_arg_specializationr
   r	   use_buffer_opsrq   r   )r   tykwargsr   s       r   r   z!HIPBackend.get_arg_specialization   sJ    00bCFC 99##h:;S;STW;X3JC
r   c                  Z   t         j                  j                  } | t        |       }|j	                         r|S t        t
              j                  dz  }|j	                         r|S t        d      }|j	                         r|S t        d      }|j	                         r|S t        d      )Nzllvm/bin/ld.lldz/opt/rocm/llvm/bin/ld.lldz/usr/bin/ld.lldzWROCm linker /opt/rocm/llvm/bin/ld.lld not found. Set 'TRITON_HIP_LLD_PATH' to its path.)r
   r	   lld_pathr   is_filerS   rT   	Exception)lld_env_pathllds     r   path_to_rocm_lldzHIPBackend.path_to_rocm_lld   s     yy))#|$C{{}
8n##&77;;=J./;;=J$%;;=Jqrrr   c                    t        j                  | j                        }|j                          t        j
                  j                  |       t        j                  j                  |       t        j                  j                  |       t        j
                  j                  |       t        j                  j                  |       t        j                  j                  |       t        j
                  j                  |       t        j                  j                  |       t        j
                  j                  |       t        j                  j!                  |       |j#                  |        | S r   )r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointer(add_rewrite_tensor_descriptor_to_pointeradd_canonicalizeradd_combineadd_reorder_broadcastadd_cseadd_triton_licmadd_symbol_dceadd_loop_unrollrun)modr   r   pms       r   	make_ttirzHIPBackend.make_ttir   s    __S[[)
!!"%..r2<<R@''+#))"-b!##B'$$R(##B'
s
r   c                    t        j                  | j                        }|j                          t        j
                  j                  |d|j                   |j                  |j                  |j                         |j                  |        t        j                  | j                        }|j                          t        j                  j                  |       t        j                  j                  |       t        j                  j                  |       t         j                  j                  j#                  ||j                  |j$                  |j&                         t        j                  j                  |       t         j                  j                  j)                  |       t        j                  j+                  |d       t         j                  j                  j-                  |       t        j                  j/                  |       t        j0                  j3                  |       t        j
                  j5                  |       t        j0                  j3                  |       t6        j                   j8                  }t6        j                   j:                  }t6        j                   j<                  }|j>                  dk(  rdx}}t         j                  j                  jA                  ||jB                  |||       |r4t         j                  j                  jE                  ||j                         t        j0                  j3                  |       |j>                  jG                         dk7  r4t         j                  j                  jI                  ||j>                         t        j                  j+                  |d       t        j                  j                  |       t        j                  jK                  |       tM        |j                        rHt         j                  j                  jO                  |       t        j                  j                  |       t         j                  j                  jQ                  |       tS        |j                        }|rC|jB                  dk(  r4t         j                  j                  jU                  ||jB                         t6        j                   jV                  r|t         j                  j                  jY                  |       t        j0                  j3                  |       t         j                  j                  j[                  ||j                         t         j                  j                  j]                  |       t        j0                  j3                  |       t        j0                  j_                  |       t        j0                  ja                  |       |r4t         j                  j                  jc                  ||j                         |j                  |        | S )Nr~   Tzlocal-prefetchr   rC   r.   )2r   r   r   r   r   r   add_convert_to_ttgpuirr#   r,   rK   r0   r   ttgpuiradd_coalesceadd_remove_layout_conversionsadd_optimize_thread_localityr	   add_accelerate_matmulr=   r>   add_optimize_epilogueadd_optimize_dot_operandsadd_hoist_layout_conversionsadd_fuse_nested_loopsr   r   r   r
   global_prefetchlocal_prefetchuse_async_copyrD   add_stream_pipeliner/   add_coalesce_async_copylowerinsert_instruction_sched_hintsadd_reduce_data_duplicationr'   add_in_thread_transposeadd_reorder_instructionsr$   add_block_pingpongr   add_canonicalize_pointersadd_convert_to_buffer_opsadd_fold_true_cmpir   r   add_update_async_wait_count)r   r   r   r   r   r   r   r!   s           r   
make_ttgirzHIPBackend.make_ttgir   s   __S[[)
**2gll^/DgFWFWY`YjYj+2+;+;	=
s__S[[)
##B'44R833B7

00W\\7C_C_ahanano44R8

00400T:

77;,,R0''+##B'''+))331111   $44/00On

..r73E3EXfhvwJJ66r7<<H''+  &&(F2JJ==b'BWBWX00T:44R82226)',,7JJ66r:NN88<

33B79',,G'"4"4"9JJ11"g6H6HI99##JJ88<MM++B/JJ88W\\J

--b1''+b!$$R(JJ::2w||L
s
r   c                    | }t        j                  |j                        }|j                          t        j
                  j                  |       t        j                  j                  |       t        j                  j                  |       t        j
                  j                  |       t        j
                  j                  |       |j                  |       |S r   )r   r   r   r   r   r   r   r   add_sccpr   add_loop_aware_cser    add_combine_tensor_select_and_ifr   )srcr   r   r   r   s        r   	ttgir_optzHIPBackend.ttgir_opt  s    __S[[)
""2&r"&&r*((,77;
s
r   c                 $   | }t        j                  |j                        }|j                          d}t        j
                  j                  j                  ||j                  |       t
        j                  j                  |       t
        j                  j                  |       t
        j                  j                  |       d}t        j
                  j                  j                  ||j                  |       t
        j                  j                  |       t
        j                  j!                  |       t
        j                  j#                  |       t
        j                  j%                  |       t
        j                  j                  |       t
        j                  j!                  |       t
        j                  j'                  |       |j(                  j+                         dk7  r?t        j
                  j                  j-                  ||j                  |j.                         t0        j2                  j4                  st
        j6                  j9                  |       t        j
                  j                  j;                  ||       |j=                  |       t?        j@                          t?        j                         }t?        jB                  ||      }t	        jD                  |       d}	t0        j2                  jF                  rd}	t?        jH                  |t        jJ                  |j                  |	       t	        jL                  ||j                         t	        jN                  |d       t	        jP                  |dd       t	        jP                  |d	d       t	        jP                  |d
d       t	        jP                  |d|jR                  dk(         |jU                         D 
cg c]  }
|
jW                         r|
 }}
|d   jY                  t        jZ                         |d   j]                  dd|j^                  |jR                  z          |d   j]                  d|j`                          |jb                  rdnd}|d   j]                  d|       t0        j2                  jF                  r'|d   je                  d       |d   jg                          t	        jh                  |d          t0        j2                  jF                  r\tk        tl              jn                  dz  }tq        |dz        tq        |dz        tq        |dz        g}t?        jr                  ||       nW|jt                  rK|jt                  D cg c]  \  }}t	        jv                  ||      s|  }}}t?        jr                  ||       t?        jx                  |t>        jz                  |j                  dg |j|                         t0        j                  j~                  rt	        j                  |d          | j                  d      |d<   t	        j                  |       t	        j                  |       tq        |      S c c}
w c c}}w )Nr   TrC    +xnacki  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64rJ   zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr8   zdenormal-fp-math-f32rM   z
asanrtl.bczocml.bczockl.bcz
ttg.sharedr   )Dr   r   r   r   r	   r   r   add_optimize_lds_usager#   convertadd_scf_to_cfadd_index_to_llvmiradd_allocate_shared_memoryadd_to_llvmirr   r   r   add_cf_to_llvmiradd_arith_to_llvmirr   rD   r   lower_instruction_sched_hintsr/   r
   compilationdisable_line_infollvmiradd_di_scopeadd_builtin_func_to_llvmirr   r   init_targets	to_moduleattach_target_tripleenable_asanattach_datalayoutTARGET_TRIPLEset_isa_versionset_abi_versionset_bool_control_constantrK   get_functionsis_declarationset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr,   r-   r?   add_fn_target_featureadd_fn_asan_attrset_all_fn_arg_inregr   rS   rT   rV   link_extern_libsr1   need_extern_liboptimize_moduleOPTIMIZE_O3r;   scalarize_packed_fops#add_scalarize_packed_fops_llvm_passget_int_attrcleanup_bitcode_metadatadisable_print_inline)r   r   r   r   r   custom_lds_size_HIPBackend__HIP_FTZr   llvm_modtarget_featuresfnfnsdenormal_moder[   pathsrg   paths                    r   	make_llirzHIPBackend.make_llir#  s   __S[[)
 

11"gllOT$$R(**2.11"5 	

((W\\9E''+b!''+**2.''+b!$$R(  &&(F2JJ<<RwOaOab  22MM&&r*

55b)D
s 	,,.>>#w/  *((&Ox):):GLL/Z 	Hgll3Hc*%%h0H%P%%h0QSWX%%h0H%P%%h0H'J[J[_aJab %224PbB<M<M<OrPPA > >?A8Bw?P?PQXQbQb?b>c:de 	A0W5I5I4JL+2+E+E6A1=A((F((2F##%
 	  Q(((!(^22U:NN\12NY./NY./E
 !!(E2  .5.A.AiltTSEXEXYacgEhTiEi!!(E2Xt'7'7r2wOgOgh99**33CF; !--l;$$X. 	  *8}_ Q@ js   8ZZZ3Zc           	         t        j                  d|       }t        |      dk(  sJ |d   |d<   g }|j                  dk(  r|j	                  d       t        j                  | t        j                  |j                  d||j                  d      }t        j                  j                  rt        d	       t        |       |S )
Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   rg   	attentionzsink-insts-to-avoid-spillsr   Fz!// -----// AMDGCN Dump //----- //)refindalllenrD   appendr   translate_to_asmr	   r  r#   r;   r
   dump_amdgcnprint)r   r   r   namesflagsamdgcns         r   make_amdgcnzHIPBackend.make_amdgcn  s    
 

QSVW5zQ 8
   K/LL56&&sC,=,=w||RQVX_XpXprwx99  56&Mr   c                    d}t         j                  j                  rd}t        j                  | |j
                  |      }t        j                         }t        j                         5 }t        j                         5 }t        |j                  d      5 }|j                  |       d d d        t        j                  |ddd|j                  d|j                  g       d d d        t        |j                  d      5 }	|	j                         }
d d d        d d d        
S # 1 sw Y   zxY w# 1 sw Y   NxY w# 1 sw Y   +xY w# 1 sw Y   
S xY w)	Nr   r   wbz-flavorgnuz-sharedz-orb)r
   r  r  r	   assemble_amdgcnr#   rq   r   tempfileNamedTemporaryFileopenrg   write
subprocess
check_callread)r   r   r   r'  rw   	rocm_pathtmp_outtmp_infd_infd_outr   s              r   
make_hsacozHIPBackend.make_hsaco  s    ((&O##CG//1	((* 	$g,,. q&&++t, 'KK&'%%y)UIv{{\`bibnbn&opq gllD) $Vkkm$	$ 
' 'q q$ $	$ 
sT   (E=D+D&8D+E<D7ED($D++D4	0E7E 	<EEc                      |t         j                  k(  r fd|d<    fd|d<   n|t         j                  k(  r	 fd|d<    fd|d<    fd|d	<    fd
|d<   y )Nc                 *    j                  | |      S r   )r   r   r   r   rY   s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    4>>#xQX3Y r   r   c                 *    j                  | |      S r   )r   rO  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    DOOCSZ4[ r   ttgirc                 *    j                  | |      S r   )r   rO  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    DNN3RY4Z r   c                 *    j                  | |      S r   )r-  rO  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    t~~c8W/U r   llirc                 *    j                  | |      S r   )r:  rO  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    1A1A#xQX1Y r   r9  c                 *    j                  | |      S r   )rL  rO  s     r   r   z'HIPBackend.add_stages.<locals>.<lambda>  s    Xw0W r   rw   )r   TRITONGLUON)rY   stagesr   r   s   ` ` r   
add_stageszHIPBackend.add_stages  sR    x&YF6N[F7O'ZF7OUvYxWwr   c                 z    t        j                  t        j                         dgd      }| d| j                   S )Nz	--versionr`   )encodingr_   )rD  check_outputrq   r   r   )rY   versions     r   rj   zHIPBackend.hash  s8    )):+F+F+H+*Vahi!DKK=))r   ) rk   rl   rm   staticmethodr   rt   ry   rV   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r-  r:  rL  rZ  	functools	lru_cacherj   __classcell__)r|   s   @r   rq   rq   Z   s^   '	 ' '"y "T "
%# %"S "2
?>S*_ 5 >
       s s&    ; ;z   k kZ  *   X Y* *r   rq   )triton.backends.compilerr   r   r   triton._C.libtritonr   r   r   r	   tritonr
   dataclassesr   typingr   r   r   typesr   rc   r@  r0  rD  r`  pathlibr   r   r$   r'   r*   rq   r   r   r   <module>rj     sw    E E 5 5  ! # #    	   0Y 0hr $9? 9? 9?xk* k*r   