
    rhz                       d dl mZ d dlZd dlmZmZmZmZmZm	Z	m
Z
 d dlZd dlmZ ddlmZ ddlmZ  ed	      Z ed
      Z G d de      Z G d de	e         Zy)    )annotationsN)ListOptionalSequenceTupleTypeVarGenericType)driver   )ir   )coreTTensorTyc                       e Zd Z fdZ xZS )IncompatibleTypeErrorImplc                    || _         || _        d| j                   j                         z   dz   | j                  j                         z   | _        t        t
        |   | j                         y )Nzinvalid operands of type  and )type_atype_b__repr__messagesuperr   __init__)selfr   r   	__class__s      k/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/triton/language/semantic.pyr   z"IncompatibleTypeErrorImpl.__init__   sX    2T[[5I5I5KKgUX\XcXcXlXlXnn'7E    )__name__
__module____qualname__r   __classcell__)r   s   @r   r   r      s    F Fr   r   c                  R   e Zd ZU ej                  Zded<   eZded<   d ZdydZdydZ	dzdZ
	 	 	 	 d{d	Zd|d}d
Zd~dZ	 	 d	 ddZddZ	 	 	 	 ddZ	 	 	 	 ddZ	 	 	 	 ddZddZddZddZddZddZddZddZddZddZddZddZddZddZddZ ddZ!dd Z"dd!Z#dd"Z$dd#Z%dd$Z&dd%Z'dd&Z(dd'Z)dd(Z*dd)Z+dd*Z,dd+Z-d,d-dd.Z.dd/Z/dd0Z0dd1Z1dd2Z2dd3Z3dd4Z4dd5Z5dd6Z6dd7Z7dd8Z8dd9Z9dd:Z:dd;Z;dd<Z<ddd=Z=d> Z>d? Z?d@ Z@dA ZAdB ZBdC ZCdD ZDdE ZEdF ZF	 	 	 	 	 	 	 	 	 	 ddGZG	 	 	 	 ddHZHddIZIddJZJddKZKdL ZLdM ZMddNZNddOZOddPZPddQZQddRZRddSZSddTZTdU ZUdV ZV	 	 	 	 ddWZWddXZX	 	 	 	 ddYZYddZZZdd[Z[dd\Z\dd]Z]dd^Z^dd_Z_dd`Z`ddaZadb Zb	 	 	 	 	 	 ddcZcdddZdddeZe	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddfZfddgZgdh ZhddiZi	 	 	 	 ddjZjddkZkddlZlddmZmddnZnddoZoddpZpddqZqddrZrddsZsdt Ztd|duZuddvZvddwZw	 	 	 	 	 	 	 	 	 	 ddxZxy,)TritonSemanticzType[TensorTy]tensorz
ir.builderbuilderc                    || _         y N)r'   )r   r'   s     r   r   zTritonSemantic.__init__   s	    r   c                    |dvrt        d|       | j                  | j                  j                  |      t        j
                        S )Nr   r   r   z+program_id axis must be 0, 1, or 2 but got )
ValueErrorr&   r'   create_get_program_idtlint32r   axiss     r   
program_idzTritonSemantic.program_id&   sA    y J4&QRR{{4<<==dCRXXNNr   c                    |dvrt        d|       | j                  | j                  j                  |      t        j
                        S )Nr+   z-num_programs axis must be 0, 1, or 2 but got )r,   r&   r'   create_get_num_programsr.   r/   r0   s     r   num_programszTritonSemantic.num_programs+   sA    y LTFSTT{{4<<??ErxxPPr   c                `   |j                   }|j                   }|j                  }|j                  }||k(  r	||kD  r|S |S |t        j                  j                  j
                  k(  r	||k\  r|S |S |t        j                  j                  j
                  k(  r	||k\  r|S |S t        d| d|       )Nzunexpected signedness r   )int_bitwidthint_signednessr.   dtype
SIGNEDNESSUNSIGNED	TypeError)r   a_tyb_tya_rankb_ranka_snb_sns          r   integer_promote_implz#TritonSemantic.integer_promote_impl4   s    """""""" 4<!F?444RXX((111!V+455RXX((111!V+4550eD6BCCr   c                   ||k7  rx|r||fn||f\  }}|j                         j                  |j                         j                  k  r6|r2|t        j                  t        j                  fv rt        j
                  S |S |j                         s|j                         rt        j                  S |j                         s|j                         rt        j
                  S |j                         s|j                         r"|rt        j
                  S t        j                  S |j                         r2|j                         r"|rt        j
                  S t        j                  S |j                         s|j                         rt        j
                  S |j                         r'|j                         r||k(  r|S t        j                  S |j                         r|j                         st        d| d|       |rL|j                  |j                  k7  r3t        d|j                         z   dz   |j                         z   dz         | j!                  ||      S )Nunexpected type r   zCannot use /, #, or % with x because they have different signedness;this is unlikely to result in a useful answer. Cast them to the same signedness.)kindvaluer.   float16bfloat16float32is_fp64float64is_fp32is_fp16is_bf16is_fp8is_intr<   r8   r   rC   )r   r=   a_is_scalarr>   b_is_scalar
div_or_mod	scalar_ty	tensor_tys           r   computation_type_implz$TritonSemantic.computation_type_implC   s   
 +%3>D$<T4L Iy~~%%)9)?)??9R[[0I#I::%   <<>T\\^:: <<>T\\^:: <<>T\\^zz!zz!<<>dllnzz!{{"<<>T\\^::;;=T[[]4<47RZZ7{{}DKKM.tfE$@AA $--1D1DD9DMMOKgUX\XeXeXggoo p p ((t44r   c                   t        |t              r9| j                  | j                  j	                  |      t
        j                        S t        |t              rd|cxk  rdk  rn nt
        j                  }nld|cxk  rdk  rn nt
        j                  }nMd|cxk  rdk  rn nt
        j                  }n.d|cxk  rdk  rn nt
        j                  }nt        d| d      | j                  ||	      S t        |t              rrd
}dddz  z  }t        d   |      }|t        d      k(  s|dk(  s||k7  s||cxk  r|k  rn nt
        j                   }nt
        j"                  }| j                  ||	      S t        |t
        j$                        r| j'                  |j(                        S t        || j                        r|S |rt+        d| dt-        |       d      |S )N           l                             l            zNonrepresentable integer .r9   g      8g   ?r      absinfg        zcannot convert z	 of type z
 to tensor)
isinstanceboolr&   r'   get_int1r.   int1intr/   uint32int64uint64r,   scalar_constantfloat__builtins__rK   rM   	constexpr	to_tensorrH   r<   type)r   x
check_typer9   min_float32max_float32abs_xs          r   ro   zTritonSemantic.to_tensoru   s   a;;t||44Q7AA3"U"!#e#		1$u$!#e#		 #<QCq!ABB'''775!!K%C/K '*Ee$|Ave2{2



'''772<<(>>!''**4;;'HoaS	$q'*MNNr   c                    |j                         rL|st        ||      |j                         r||k7  rt        ||      |j                         rt        ||      y y r)   )is_ptrr   is_floating)r   r   r   allow_ptr_as       r   check_ptr_type_implz"TritonSemantic.check_ptr_type_impl   s[    ==?/??}}Ff$4/??!!#/?? $ r   c                &   t        |t        j                        }t        |t        j                        }|r|}	| j                  |      }|r|}
| j                  |      }|j                  j
                  }|j                  j
                  }| j                  |||       | j                  |||       |rF|j                         s5|j                         s$| j                  |||||      }|r	dk  r|j                         s|r 
dk  r|j                         rt        d      |j                         rx|r:|j                         	cxk  r|j                         k  sn t        d|	 d|       |r:|j                         
cxk  r|j                         k  sn t        d|
 d|       |r| j                  	|      n| j                  ||      }|r| j                  
|      n| j                  ||      }| j!                  ||      \  }}||fS )Nr   z{Cannot perform a binary operation between an unsigned tensor and a negative scalar. Perform a explicit cast on one of them.zScalar z is out of range for type r_   )rc   numbersNumberro   rp   scalarrz   rw   rX   is_int_unsignedr,   rR   get_int_min_valueget_int_max_valuerk   castbroadcast_impl_value)r   lhsrhsallow_lhs_ptrallow_rhs_ptrarithmetic_checkrU   lhs_is_scalarrhs_is_scalar
lhs_scalar
rhs_scalar
lhs_sca_ty
rhs_sca_ty
ret_sca_tys                 r   binary_op_type_checking_implz+TritonSemantic.binary_op_type_checking_impl   s    #37"37J..%CJ..%C XX__
XX__
  ZG  ZGJ$5$5$7
@Q@Q@S33Jz[hjtuJ*q.Z5O5O5Q$aJ<V<V<X  "K L L  " **F*F*HJ +I*4*F*F*H+I$wzl:TU_T`%abb **F*F*HJ +I*4*F*F*H+I$wzl:TU_T`%abbHU$&&z&D[_[d[dehjt[uCHU$&&z&D[_[d[dehjt[uC ,,S#6SCxr   c                *   |j                   j                  j                  dk\  s | j                  j                  j
                  sy |j                   j                  }|j                   j                  }||k(  sJ |j                         sJ | j                  |t        j                        }| j                  |t        j                        } |||d      }|j                         }| j                  |t        j                        }|j                         }| j                  |t        j                        }| j                  | j                  ||      | j                  ||            }	d|j                   d|j                    }
| j#                  |	|
       y )N@   Frg   z! overflow detected for operation )rp   r~   r7   r'   optionssanitize_overflowrR   r   r.   ri   r   rk   r   and_
less_equalgreater_equalr    device_assert)r   r   r   	binary_opr   r   ret	max_value	min_valuecondmsgs              r    binary_op_sanitize_overflow_implz/TritonSemantic.binary_op_sanitize_overflow_impl   s?   88??''2-T\\5I5I5[5[XX__
XX__
Z'''  """iiRXX&iiRXX&S%(002	((BHH=	002	((BHH=	yyi8$:L:LSR[:\]J++,,MiN`N`Mab4%r   c                0   | j                  ||dd      \  }}|j                  j                  }|j                  j                  }|j                         r|j                         rt	        d      |j                         r@|j                         s0||}}|j                  j                  }|j                  j                  }|j                         r|j
                  }|j                  j                         r|j                  j                  dk  ri|j                  j                  t        j                        j                  | j                        }| j                  j                  |j
                  |d      }| j                  | j                  j!                  |j
                  |      |j                        S |j#                         rJ| j                  | j                  j%                  |j
                  |j
                        |j                        S |j'                         ri|r| j)                  ||| j*                         | j                  | j                  j-                  |j
                  |j
                        |j                        S t	        d|       )NTzcannot add pointers togetherr   FrE   )r   rp   r~   rw   r<   handler9   r   r7   with_element_tyr.   ri   to_irr'   create_int_castr&   create_addptrrx   create_faddrR   r   add
create_add)r   inputotherr   input_scalar_tyother_scalar_tyother_handlei64_tys           r   r   zTritonSemantic.add   s   88tTRu**++**++!!#(>(>(@:;; !!#O,B,B,D %5E#jj//O#jj//O!!# <<L{{**,1I1IB1N33BHH=CCDLLQ#||;;ELL&RWX;;t||99%,,UW\WaWabb((*;;t||77ellSUZU_U_``##% 55eUDHHM;;t||66u||U\\RTYT^T^__*?*;<==r   c                   | j                  ||dd      \  }}|j                  j                  }|j                         r#| j	                  || j                  |      d      S |j                         rJ| j                  | j                  j                  |j                  |j                        |j                        S |j                         ri|r| j                  ||| j                         | j                  | j                  j                  |j                  |j                        |j                        S t        d|       )NTF)r   rE   )r   rp   r~   rw   r   minusrx   r&   r'   create_fsubr   rR   r   sub
create_subr<   r   r   r   r   rV   s        r   r   zTritonSemantic.sub   s    88tUSuJJ%%	88E4::e#48NN  ";;t||77ellSUZU_U_`` 55eUDHHM;;t||66u||U\\RTYT^T^__*9+677r   c                   | j                  ||      \  }}|j                  j                  }|j                         rJ| j	                  | j
                  j                  |j                  |j                        |j                        S |j                         ri|r| j                  ||| j                         | j	                  | j
                  j                  |j                  |j                        |j                        S t        d|       NrE   )r   rp   r~   rx   r&   r'   create_fmulr   rR   r   mul
create_mulr<   r   s        r   r   zTritonSemantic.mul  s    88FuJJ%%	  ";;t||77ellSUZU_U_`` 55eUDHHM;;t||66u||U\\RTYT^T^__*9+677r   c                   | j                  ||dddd      \  }}|j                  j                  }|j                  j                  }|j                         r$|j	                         r| j                  ||      }n|j	                         r#|j                         r| j                  ||      }n|j	                         rQ|j	                         rA| j                  |t        j                        }| j                  |t        j                        }nm|j                         rO|j                         r?|j                  |j                  kD  r| j                  ||      }n!| j                  ||      }nt        d|       | j                  | j                  j                  |j                  |j                        |j                        S NFTrE   )r   rp   r~   rx   rR   r   r.   rK   fp_mantissa_widthr<   r&   r'   create_fdivr   )r   r   r   r   r   s        r   truedivzTritonSemantic.truediv  s_   88ueUY[_`u**++**++&&(_-C-C-EIIe_5E##%/*E*E*GIIe_5E##%/*@*@*BIIeRZZ0EIIeRZZ0E((*/J/J/L00?3T3TT		%9		%9 ..?@AA{{4<<33ELL%,,OQVQ[Q[\\r   c                   | j                  ||dddd      \  }}|j                  j                  }|j                  j                  }|j                         r|j                         r| j	                  ||      }| j                  ||      }| j                  ||      }|j                         rJ| j                  | j                  j                  |j                  |j                        |j                        S | j                  | j                  j                  |j                  |j                        |j                        S t        d|       r   )r   rp   r~   rR   rC   r   is_int_signedr&   r'   create_sdivr   create_udivr<   )r   r   r   r   r   ret_tys         r   floordivzTritonSemantic.floordiv7  s   88ueUY[_`u**++**++!!#(>(>(@..PFIIeV,EIIeV,E##%{{4<<#;#;ELL%,,#WY^YcYcdd{{4<<#;#;ELL%,,#WY^YcYcdd*?*;<==r   c                z   |j                   j                  }|j                   j                  }|j                         r|j                         st        d      | j	                  ||dddd      \  }}| j
                  j                  |j                  |j                        }| j                  ||j                         S )Nz4both operands of fdiv must have floating scalar typeFT)	rp   r~   rx   r<   r   r'   r   r   r&   )r   r   r   ieee_roundingr   r   r   s          r   fdivzTritonSemantic.fdivE  s    **++**++**,O4O4O4QRSS88ueUZ\`aull&&u||U\\B{{3

++r   c                \   | j                  ||dddd      \  }}|j                  j                  }|j                  j                  }|j                         rJ| j	                  | j
                  j                  |j                  |j                        |j                        S |j                         r|j                  |j                  k7  r3t        d|j                         z   dz   |j                         z   dz         |j                         rJ| j	                  | j
                  j                  |j                  |j                        |j                        S | j	                  | j
                  j                  |j                  |j                        |j                        S t        d|       )NFTzCannot mod z by rF   rE   )r   rp   r~   rx   r&   r'   create_fremr   rR   r8   r<   r   r   create_sremcreate_urem)r   r   r   rV   r   s        r   modzTritonSemantic.modN  s`   88ueUY[_`uJJ%%	**++  ";;t||77ellSUZU_U_``''?+I+II	0B0B0D Dv MP_PhPhPj j ns !s t t &&({{4<<#;#;ELL%,,#WY^YcYcdd{{4<<#;#;ELL%,,#WY^YcYcdd*9+677r   c                   | j                  ||      \  }}|j                  }|j                         r|t        j                  j
                  k(  rJ| j                  | j                  j                  |j                  |j                        |j                        S |t        j                  j                  k(  rJ| j                  | j                  j                  |j                  |j                        |j                        S t        d|       |j                         rJ| j                  | j                  j                  |j                  |j                        |j                        S |j!                         rJ| j                  | j                  j#                  |j                  |j                        |j                        S t%        d|       NzUnexpected propagate_nan Unexpected dtype )r   r9   rx   r.   PropagateNanALLr&   r'   create_minimumfr   rp   NONEcreate_minnumfr,   r   create_minsir   create_minuir<   r   rq   ypropagate_nanr9   s        r   minimumzTritonSemantic.minimume  M   00A61 3 33{{4<<#?#?!((#SUVU[U[\\"//"6"66{{4<<#>#>qxx#RTUTZTZ[[ #<]O!LMM  ";;t||88188LaffUU""$;;t||88188LaffUU/w788r   c                   | j                  ||      \  }}|j                  }|j                         r|t        j                  j
                  k(  rJ| j                  | j                  j                  |j                  |j                        |j                        S |t        j                  j                  k(  rJ| j                  | j                  j                  |j                  |j                        |j                        S t        d|       |j                         rJ| j                  | j                  j                  |j                  |j                        |j                        S |j!                         rJ| j                  | j                  j#                  |j                  |j                        |j                        S t%        d|       r   )r   r9   rx   r.   r   r   r&   r'   create_maximumfr   rp   r   create_maxnumfr,   r   create_maxsir   create_maxuir<   r   s        r   maximumzTritonSemantic.maximumv  r   r   c                   | j                  ||      \  }}| j                  ||      \  }}| j                  ||      \  }}|j                  }|j                         rV| j                  | j                  j                  |j                  |j                  |j                  |      |j                        S t        d| d      )Nr   z(. Only floating point clamp is supported)	r   r9   rx   r&   r'   create_clampfr   rp   r<   )r   rq   minmaxr   r9   s         r   clampzTritonSemantic.clamp  s    44S#>S221c:3221c:3;;t||99!((CJJPSPZPZ\ijlmlrlrss/w6^_``r   c                d   | j                  ||      \  }}|j                  j                  }|j                  j                  }|j                         r|j                         st	        ||      | j                  ||      }||k7  r| j                  ||      }||k7  r| j                  ||      }||fS r)   )r   rp   r~   rR   r   rC   r   )r   r   r   input_sca_tyother_sca_tyr   s         r   bitwise_op_type_checking_implz,TritonSemantic.bitwise_op_type_checking_impl  s    88Fuzz((zz((""$L,?,?,A+L,GG..|\J
%IIeZ0E%IIeZ0Ee|r   c                    | j                  ||      \  }}| j                  | j                  j                  |j                  |j                        |j
                        S r)   )r   r&   r'   
create_andr   rp   r   r   r   s      r   r   zTritonSemantic.and_  I    99%Gu{{4<<225<<NPUPZPZ[[r   c                    | j                  ||      \  }}| j                  | j                  j                  |j                  |j                        |j
                        S r)   )r   r&   r'   	create_orr   rp   r   s      r   or_zTritonSemantic.or_  sF    99%Gu{{4<<11%,,MuzzZZr   c                    | j                  ||      \  }}| j                  | j                  j                  |j                  |j                        |j
                        S r)   )r   r&   r'   
create_xorr   rp   r   s      r   xor_zTritonSemantic.xor_  r   r   c                   |j                   j                         s | j                  |t        j                        }|j                   j                         s | j                  |t        j                        }| j                  ||      S r)   )rp   is_int1bitcastr.   rf   r   r   s      r   logical_andzTritonSemantic.logical_and  s[    zz!!#LL0Ezz!!#LL0Eyy&&r   c                   |j                   j                         s | j                  |t        j                        }|j                   j                         s | j                  |t        j                        }| j                  ||      S r)   )rp   r   r   r.   rf   r   r   s      r   
logical_orzTritonSemantic.logical_or  s[    zz!!#LL0Ezz!!#LL0Exxu%%r   c                    |j                   j                         s | j                  |t        j                        }| j                  |      S r)   )rp   r   r   r.   rf   invertr   r   s     r   not_zTritonSemantic.not_  s5    zz!!#LL0E{{5!!r   c                    | j                  ||      \  }}| j                  | j                  j                  |j                  |j                        |j
                        S r)   )r   r&   r'   create_lshrr   rp   r   s      r   lshrzTritonSemantic.lshr  I    99%Gu{{4<<33ELL%,,OQVQ[Q[\\r   c                    | j                  ||      \  }}| j                  | j                  j                  |j                  |j                        |j
                        S r)   )r   r&   r'   create_ashrr   rp   r   s      r   ashrzTritonSemantic.ashr  r  r   c                    | j                  ||      \  }}| j                  | j                  j                  |j                  |j                        |j
                        S r)   )r   r&   r'   
create_shlr   rp   r   s      r   shlzTritonSemantic.shl  r   r   c                    |S r)    r   s     r   pluszTritonSemantic.plus  s    r   c                :   |j                   j                  }|j                         rt        d|j	                         z   dz         | j                  | j                  j                  |j                  | j                              |      }| j                  ||d      S )Nz$wrong type argument to unary minus ()T)
rp   r~   rw   r,   r   r&   r'   get_null_valuer   r   )r   r   r   _0s       r   r   zTritonSemantic.minus  s    zz(( ClF[F[F]]`ccdd[[44\5G5G5UVXdexxE4((r   c                X   |j                   j                  }|j                         s|j                         rt	        d|j                         z   dz         | j                  | j                  j                  |j                  | j                              |      }| j                  ||      S )Nz%wrong type argument to unary invert (r  )rp   r~   rw   rx   r,   r   r&   r'   get_all_ones_valuer   r   )r   r   r   _1s       r   r   zTritonSemantic.invert  s    zz(( L$<$<$>D|G\G\G^^addee[[889K9KDLL9YZ\hiyy##r   c                T    |j                   j                  t        j                        S r)   )rp   r   r.   rf   )r   vs     r   
_bool_likezTritonSemantic._bool_like  s    vv%%bgg..r   c                   | j                  ||      \  }}|j                  j                  }|j                         rO| j	                  | j
                  j                  |j                  |j                        | j                  |            S |j                         r|j                         rO| j	                  | j
                  j                  |j                  |j                        | j                  |            S | j	                  | j
                  j                  |j                  |j                        | j                  |            S t        d|       r   )r   rp   r~   rx   r&   r'   create_fcmpOGTr   r  rR   r   create_icmpSGTcreate_icmpUGTr<   r   r   r   rV   s       r   greater_thanzTritonSemantic.greater_than     88FuJJ%%	  ";;t||::5<<VX\XgXghmXnoo&&({{4<<#>#>u||U\\#Z\`\k\klq\rss{{4<<#>#>u||U\\#Z\`\k\klq\rss*9+677r   c                   | j                  ||      \  }}|j                  j                  }|j                         rO| j	                  | j
                  j                  |j                  |j                        | j                  |            S |j                         r|j                         rO| j	                  | j
                  j                  |j                  |j                        | j                  |            S | j	                  | j
                  j                  |j                  |j                        | j                  |            S t        d|       r   )r   rp   r~   rx   r&   r'   create_fcmpOGEr   r  rR   r   create_icmpSGEcreate_icmpUGEr<   r  s       r   r   zTritonSemantic.greater_equal  r  r   c                   | j                  ||      \  }}|j                  j                  }|j                         rO| j	                  | j
                  j                  |j                  |j                        | j                  |            S |j                         r|j                         rO| j	                  | j
                  j                  |j                  |j                        | j                  |            S | j	                  | j
                  j                  |j                  |j                        | j                  |            S t        d|       r   )r   rp   r~   rx   r&   r'   create_fcmpOLTr   r  rR   r   create_icmpSLTcreate_icmpULTr<   r  s       r   	less_thanzTritonSemantic.less_than  r  r   c                   | j                  ||      \  }}|j                  j                  }|j                         rO| j	                  | j
                  j                  |j                  |j                        | j                  |            S |j                         r|j                         rO| j	                  | j
                  j                  |j                  |j                        | j                  |            S | j	                  | j
                  j                  |j                  |j                        | j                  |            S t        d|       r   )r   rp   r~   rx   r&   r'   create_fcmpOLEr   r  rR   r   create_icmpSLEcreate_icmpULEr<   r  s       r   r   zTritonSemantic.less_equal  r  r   c                   | j                  ||      \  }}|j                  j                  }|j                         rO| j	                  | j
                  j                  |j                  |j                        | j                  |            S |j                         rO| j	                  | j
                  j                  |j                  |j                        | j                  |            S t        d|       r   )r   rp   r~   rx   r&   r'   create_fcmpOEQr   r  rR   create_icmpEQr<   r  s       r   equalzTritonSemantic.equal"      88FuJJ%%	  ";;t||::5<<VX\XgXghmXnoo;;t||99%,,UW[WfWfglWmnn*9+677r   c                   | j                  ||      \  }}|j                  j                  }|j                         rO| j	                  | j
                  j                  |j                  |j                        | j                  |            S |j                         rO| j	                  | j
                  j                  |j                  |j                        | j                  |            S t        d|       r   )r   rp   r~   rx   r&   r'   create_fcmpUNEr   r  rR   create_icmpNEr<   r  s       r   	not_equalzTritonSemantic.not_equal-  r.  r   N)r   c                  t        |t              rt        |t              st        d      t        |dz	        }t        |dz	        }|s|rt        d      ||k  rt        d      ||z
  }||dz
  z  dk7  rt        d      |g}|$t	        j
                  t        j                  |      }|j                  | j                        }| j                  | j                  j                  |||      |      S )Nz/arange's arguments must be of type tl.constexpr    zarange must fit in int32z=arange's end argument must be greater than the start argumentr   r   z#arange's range must be a power of 2)rc   rg   r,   rd   r.   
block_typer/   r   r'   r&   create_make_range)	r   startendr   is_start_int64is_end_int64rangeshape	ret_ty_irs	            r   arangezTritonSemantic.arange<  s    %%ZS-ANOOerk*C2I\788%<\]]eUQYA%BCC>]]288U3FLL.	{{4<<99)UCPRXYYr   c                
   |t        d      |dk(  r5| j                  j                  |j                  | j                              }n+t	        | j                  d|j
                         } ||      }| j                  ||      S )Nz2dtype must be specified when value is not a tensorr   get_)r,   r'   r  r   getattrnamer&   )r   rH   r9   get_value_fns       r   rk   zTritonSemantic.scalar_constantN  sq    =QRRA:LL//DLL0IJE"4<<4

|1DEL 'E{{5%((r   c                    t        |t        j                        r2|j                  j                  dk(  sJ d       | j                  ||      S | j                  ||      S )Nr   zonly accepts size-1 tensor)rc   r.   r&   numelrH   r   rk   )r   rH   r9   s      r   make_scalarzTritonSemantic.make_scalarY  sR    eRYY';;$$)G+GG)99UE**##E511r   c                F    | j                  | j                  ||      |      S r)   )splatrF  )r   r<  rH   r9   s       r   fullzTritonSemantic.full`  s     zz$**5%8%@@r   c                B   |j                   j                         rJ d       t        |      dk(  r|S t        j                  |j
                  |      }| j                  | j                  j                  |j                  | j                        |j                        |      S )NzCannot splat a block tensorr   )rp   is_blocklenr.   r5  r9   r&   r'   create_splatr   r   )r   rH   r<  r   s       r   rH  zTritonSemantic.splatg  sy    ::&&(G*GG(u:?Lu{{E2{{4<<44V\\$,,5OQVQ]Q]^`fggr   c                (   d}|D ]  }||z  }	 |j                   j                  |k7  rt        d      t        j                  |j                   j
                  |      }| j                  | j                  j                  |j                  ||      |      S )Nr   z:reshape() cannot change total number of elements in tensor)
rp   rE  r,   r.   r5  r~   r&   r'   create_reshaper   )r   r   	dst_shapecan_reorderrE  sr   s          r   reshapezTritonSemantic.reshapen  s     	AQJE	::u$YZZuzz00)<{{4<<66u||YP[\^deer   c                   |j                   D cg c]  }t        j                  |       }}|j                  |d       |j                  j                         s| j                  ||      S t        j                  |j                  j                  |      }| j                  | j                  j                  |j                  |      |      S c c}w )Nr   )r<  )r<  r.   _unwrap_if_constexprinsertrp   rK  rH  r5  r~   r&   r'   create_expand_dimsr   )r   r   r1   rq   rP  r   s         r   expand_dimszTritonSemantic.expand_dimsw  s    9>EAR,,Q/E	Eq!zz""$::e9:55uzz00)<{{4<<::5<<NPVWW Fs   Cc                X   |sJ d       t        |j                        dk(  sJ t        j                  |j                  j
                  |j                  d   |j                  d   z   g      }| j                  | j                  j                  |j                  |j                        |      S )Nz;current implementation of `cat` always may reorder elementsr   r   )
rL  r<  r.   r5  rp   r~   r&   r'   
create_catr   )r   r   r   rQ  ret_types        r   catzTritonSemantic.cat  s    YYY{399~"""==399Q<#))A,3N2OP{{4<<223::szzJHUUr   c                :   | j                  ||      \  }}|j                  g k(  }|r$| j                  |d      }| j                  |d      }t        |j                  d   t        j
                        rt	        j
                  d      }nd}|j                  |gz   }t	        j                  |j                  j                  |      }| j                  | j                  j                  |j                  |j                        |      }|r| j                  |dgd      }|S )Nr   r   FrQ  )r   r<  rX  rc   r.   rn   r5  rp   r~   r&   r'   create_joinr   rS  )r   ab
was_rank_1two	new_shaper[  r   s           r   joinzTritonSemantic.join  s    ((A.1 WW]
  A&A  A&Aaggbk2<<0,,q/CCGGseO	==	:kk$,,22188QXXFQ,,sQCU,;C
r   c                   t        |j                        dkD  sJ t        j                  |j                  d         dk(  sJ |j                  d d }t        j                  |j
                  j                  |      }| j                  j                  |j                        \  }}| j                  ||      | j                  ||      fS )Nr   r^  r   )rL  r<  r.   rU  r5  rp   r~   r'   create_splitr   r&   )r   ra  re  r[  outLHSoutRHSs         r   splitzTritonSemantic.split  s    AGGq ! ''49:9GGCRL	==	:22188<KK)KK)
 	
r   c                   t        |j                        t        |      k7  rt        d      t        d |D              t	        t        t        |                  k7  rt        d|       t        j                  |j                  j                  |D cg c]  }|j                  |    c}      }| j                  | j                  j                  |j                  |      |      S c c}w )Nz5permute dims must have the same length as input shapec              3  F   K   | ]  }t        j                  |        y wr)   )r.   rU  ).0ds     r   	<genexpr>z)TritonSemantic.permute.<locals>.<genexpr>  s     ;"))!,;s   !z?permute dims must be a permutation of 0, 1, ..., n-1, but were )rL  r<  r,   sortedlistr;  r.   r5  rp   r~   r&   r'   create_transr   )r   r   dimsro  r[  s        r   permutezTritonSemantic.permute  s    u{{s4y(TUU;d;;tE#d)DT?UU^_c^deff==!2!2T4RU[[^4RS{{4<<44U\\4H(SS 5Ss   C&
c                (   |j                   j                         s| j                  ||      S |j                   j                         }t	        |      t	        |      k7  rt        d| d|       ||k(  r|S t        |      D ]0  \  }}||   |k7  s|dk7  st        d||    d| d| d| d| 
       t        j                  |j                   j                  |      }| j                  | j                  j                  |j                  |      |      S )Nz!Cannot broadcast, rank mismatch: z, r   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension : )rp   rK  rH  get_block_shapesrL  r,   	enumerater.   r5  r~   r&   r'   create_broadcastr   )r   r   r<  	src_shapeiitemr   s          r   broadcast_impl_shapez#TritonSemantic.broadcast_impl_shape  s   zz""$::eU++JJ//1	y>SZ'@2eWUVVIL + 	@GAtQx4DAI #VW\]^W_V` aCCG& I%%&Cr)Bug"? @ @	@
 uzz00%8{{4<<88uMvVVr   c           	     x   |j                   }|j                   }|j                         r~|j                         sn|j                  |j                        }| j	                  | j
                  j                  |j                  | j
                        |j                        |      }||fS |j                         s~|j                         rn|j                  |j                        }| j	                  | j
                  j                  |j                  | j
                        |j                        |      }||fS |j                         r|j                         r|j                         }|j                         }t        |      t        |      k  rt        t        |      t        |            D ]  }| j	                  | j
                  j                  |j                  d      t        j                  |j                  dg|j                  z               }|j                   }|j                         } nt        |      t        |      k  rt        t        |      t        |            D ]  }| j	                  | j
                  j                  |j                  d      t        j                  |j                  dg|j                  z               }|j                   }|j                         } t        |      t        |      k(  sJ g }t!        |      D ]q  \  }	}
||	   }|
dk(  r|j#                  |       "|dk(  s||
k(  r|j#                  |
       >t%        dt'        |	      z   dz   t'        |
      z   dz   t'        |      z          ||k7  rVt        j                  |j                  |      }| j	                  | j
                  j)                  |j                  |      |      }||k7  rVt        j                  |j                  |      }| j	                  | j
                  j)                  |j                  |      |      }||fS )Nr   r   z?Cannot make_shape_compatible: incompatible dimensions at index rw  r   )rp   rK  r   r~   r&   r'   rM  r   r   rx  rL  r;  rW  r.   r5  valuesry  appendr,   strrz  )r   r   r   lhs_tyrhs_ty	lhs_shape	rhs_shape_	ret_shaper|  leftrightr   s                r   r   z#TritonSemantic.broadcast_impl_value  s    ??V__%6++FMM:F++dll77T\\8RTWT^T^_aghCV CxS "v'8++FMM:F++dll77T\\8RTWT^T^_aghCN CxK __6??#4//1I//1I9~I.s9~s9~> :A++dll&E&EcjjRS&T&(mmFMMA3IYIYCY&Z\C XXF & 7 7 9I	:
 Y#i.0s9~s9~> :A++dll&E&EcjjRS&T&(mmFMMA3IYIYCY&Z\C XXF & 7 7 9I	:
 y>S^333I$Y/ e4!!19$$U+qjetm$$T*$ &136q6&:<@&ACFt9&MOV&WY\]bYc&d e ee I%v}}i@kk$,,"?"?

I"VX^_I%v}}i@kk$,,"?"?

I"VX^_Cxr   c                    |y |dk(  rt         j                  j                  S |dk(  rt         j                  j                  S t	        d| d      )NrtnertzzInvalid rounding mode: z0. Supported rounding modes are 'rtne' and 'rtz'.)r   ROUNDING_MODERTNERTZr,   )r   rounding_modes     r   _str_to_rounding_modez$TritonSemantic._str_to_rounding_mode  sU     F"##(((E!##'''2=/Aqrssr   c                6   |j                   }|j                         r|j                  |j                        }||k(  r|S |j                  }|j                  }|j	                         s|j	                         r| j                  ||      S |j                  }|j                  }||k7  r&t        dt        |      z   dz   t        |      z         | j                  | j                  j                  |j                  |j                  | j                              |      S )Nz!Cannot bitcast data-type of size z to data-type of size )rp   rK  r   r~   rw   r   primitive_bitwidthr,   r  r&   r'   create_bitcastr   r   )r   r   dst_tysrc_ty
src_sca_ty
dst_sca_tysrc_bitsdst_bitss           r   r   zTritonSemantic.bitcast  s    ??++FMM:FVL]]
]]
*"3"3"599UF++0000x@3x=P T2 247MB C C{{4<<66u||V\\RVR^R^E_`bhiir   c                   |j                   }|j                  }|j                  }||k(  r|S |j                         r|j                  |      }| j	                  |      }d}|j                         rf|j                         rV|j                  |j                  k  r=|t        j                  j                  }nH|t        j                  j                  k7  r+d}n(|&t        dt        |      z   dz   t        |      z         |j                         s|j                         rP| j                  j                  j                  d      	 J d        | j                  j                  d   ||||       S |j!                         r|j                         s"|j                         r|j!                         s|rP| j#                  | j                  j%                  |j&                  |j)                  | j                        |      |      S |j+                         r|j-                         r |j/                         r@|j-                         s0| j1                  | j1                  |t2        j4                        |      S |j                         xr+ |j                         xr |j                  |j                  kD  }|rO| j#                  | j                  j7                  |j&                  |j)                  | j                              |      S |j                         xr+ |j                         xr |j                  |j                  k  }	|	rO| j#                  | j                  j9                  |j&                  |j)                  | j                              |      S |j;                         r2|j;                         r!|j<                  |j<                  k7  s|j>                  |j>                  k7  r|jA                         xr |jC                          }
|jC                         rl|jD                  j)                  | j                        }| j#                  | j                  jG                  |      |jD                        }| jI                  ||      S | j#                  | j                  jK                  |j&                  |j)                  | j                        |
      |      S |jM                         r;|j;                         r*|jC                         rl|jD                  j)                  | j                        }| j#                  | j                  jG                  |      |jD                        }| jI                  ||      S |jA                         rO| j#                  | j                  jO                  |j&                  |j)                  | j                              |      S | j#                  | j                  jQ                  |j&                  |j)                  | j                              |      S |j;                         r|jM                         r|jC                         s|jA                         sO| j#                  | j                  jS                  |j&                  |j)                  | j                              |      S | j#                  | j                  jU                  |j&                  |j)                  | j                              |      S |jW                         r|j;                         r|j<                  }|dk(  rO| j#                  | j                  jY                  |j&                  |j)                  | j                              |      S |d	k(  rg| jI                  | j1                  |t2        jZ                        | j#                  | j                  j]                  d
      t2        jZ                              S |j;                         r_|jW                         rO| j#                  | j                  j_                  |j&                  |j)                  | j                              |      S |jW                         r_|jW                         rO| j#                  | j                  ja                  |j&                  |j)                  | j                              |      S J d| d|        )NFTz]fp_downcast_rounding should be set only for truncating fp conversions. Source scalar type is z and destination type is convert_custom_typesz0target doesn't provide conversion for this type.)	_semanticr   r   r   zcannot cast z to )1rp   r~   rK  r   r  rx   r  r   r  r  r,   r  is_fp8e4b15r'   codegen_fnsgetrQ   r&   create_fp_to_fpr   r   rO   rN   rP   r   r.   rK   create_fp_trunccreate_fp_extrR   r7   r8   r   is_boolr9   r  r2  r   is_standard_floatingcreate_fp_to_sicreate_fp_to_uicreate_ui_to_fpcreate_si_to_fprw   create_ptr_to_intri   	get_int64create_int_to_ptrr  )r   r   r  fp_downcast_roundingr  r  r  use_custom_roundingtruncate_fpext_fpsign_extendtyr  bitwidths                 r   r   zTritonSemantic.cast  s   ]]
]]
#L??++J7F  $99:NO#!!#
(>(> )
++j.K.KK#+BDTDTDYDY-A%)9)9)>)>>VZ@S#/  ":<?
O"LNi"j!$Z"1 2 2 ""$
(>(>(@<<++//&(/34 h5gh 4C4<<++,BCE6Sgswxx J$:$:$<""$):):)<;;,,U\\6<<;UWklntv v  ););)= ););)=99TYYubjj9:FF
 !,,. J""$J))J,I,II 	 ;;t||;;ELL&,,W[WcWcJdegmnn '') J""$J))J,I,II 	 ;;t||99%,,UYUaUaHbcekll :#4#4#6##z'>'>>*B[B[_i_x_xBx$224QZ=O=O=Q9QK!!#[[&&t||4[[!<!<R!@%++N~~eR00{{4<<#?#?fll[_[g[gNhju#v#)+ + **,1B1B1D!!#[[&&t||4[[!<!<R!@%++N~~eR00))+{{4<<#?#?fll[_[g[gNh#ikqrr{{4<<#?#?fll[_[g[gNh#ikqrr :#B#B#D!!#:+C+C+E{{4<<#?#?fll[_[g[gNh#ikqrr{{4<<#?#?fll[_[g[gNh#ikqrr :#4#4#6!..H2~{{4<<#A#A%,,PVP\P\]a]i]iPj#kmstt1}~~diirxx&@$++dllNdNdefNgikiqiqBrss :#4#4#6;;t||==ellFLLY]YeYeLfgiopp :#4#4#6;;t||::5<<VZVbVbIcdflmm8UG4x88ur   c                "   t         j                  j                  }|rr|dk(  rt         j                  j                  }|S |dk(  rt         j                  j                  }|S |dk(  rt         j                  j
                  }|S t        d| d      |S )Nz.ca.cgz.cvCache modifier  not supported)r   CACHE_MODIFIERr   CACGCVr,   r   cache_modifiercaches      r   _str_to_load_cache_modifierz*TritonSemantic._str_to_load_cache_modifier  s    !!&&&)),,   5()),,
 	  5()),,  !?>2B.!QRRr   c                d   t         j                  j                  }|r|dk(  rt         j                  j                  }|S |dk(  rt         j                  j                  }|S |dk(  rt         j                  j
                  }|S |dk(  rt         j                  j                  }|S t        d| d      |S )Nz.wbr  z.csz.wtr  r  )r   r  r   WBr  CSWTr,   r  s      r   _str_to_store_cache_modifierz+TritonSemantic._str_to_store_cache_modifier  s    !!&&&)),,   5()),,   5()),,
 	  5()),,  !?>2B.!QRRr   c                    t         j                  j                  }|rQ|dk(  rt         j                  j                  }|S |dk(  rt         j                  j                  }|S t        d| d      |S )N
evict_lastevict_firstzEviction policy r  )r   EVICTION_POLICYNORMAL
EVICT_LASTEVICT_FIRSTr,   )r   eviction_policyevictions      r   _str_to_eviction_policyz&TritonSemantic._str_to_eviction_policy  su    %%,,,.--88
 	 !M1--99  !#3O3DN!STTr   c                    d }|rQ|dk(  rt         j                  j                  }|S |dk(  rt         j                  j                  }|S t	        d| d      |S )NzeronanzPadding option r  )r   PADDING_OPTIONPAD_ZEROPAD_NANr,   )r   padding_optionpaddings      r   _str_to_padding_optionz%TritonSemantic._str_to_padding_option  sh    '++44
 	  5(++33  !?>2B.!QRRr   c                d   t         j                  j                  }|r|dk(  rt         j                  j                  }|S |dk(  rt         j                  j                  }|S |dk(  rt         j                  j                  }|S |dk(  rt         j                  j
                  }|S t        d| d      |S )Nacquirereleaseacq_relrelaxedMemory semantic r  )r   MEM_SEMANTICACQUIRE_RELEASEACQUIRERELEASERELAXEDr,   )r   
sem_optionsems      r   _str_to_semzTritonSemantic._str_to_sem  s    oo--Y&oo-- 
 y(oo-- 
 y(oo55
 
	 y(oo-- 
 !#3J<~!NOO
r   c                "   t         j                  j                  }|rr|dk(  rt         j                  j                  }|S |dk(  rt         j                  j                  }|S |dk(  rt         j                  j                  }|S t        d| d      |S )Ngpuctasysr  r  )r   MEM_SYNC_SCOPEGPUCTASYSTEMr,   )r   scope_optionscopes      r   _str_to_scopezTritonSemantic._str_to_scope  s    !!%%u$))--  &))--
 	 &))00  !#3L>!PQQr   c                ~   |rt        |d      s|g}|D cg c]*  }t        |t        j                        r|j                  n|, }}|D ]+  }t        |t
              rd|cxk  rt        |      k  r(J  J  t        |      dkD  sJ t        |      t        t        |            k(  sJ d       t        |      S yc c}w )N__iter__r   z'Duplicate dimension in `boundary_check`r
  )	hasattrrc   r.   rn   rH   rg   rL  setrq  )r   boundary_checkblock_shapeelemdims        r   _canonicalize_boundary_checkz+TritonSemantic._canonicalize_boundary_check  s    >:6"0!1aopY]JtR\\,JdjjPTTpNp% L!#s+S0K3{;K0KKK0KKKL~&***~&#c..A*BBmDmmB.)) qs   /B:c	           
        ||t        d      |j                  j                  j                  }	|	t        j                  k7  sJ d       |	j                         r(|t        j                  j                  k(  rt        d      |j                  j                  }
| j                  ||
j                               }| j                  | j                  j                  |j                  |||||      |
      S )NK`mask` and `other` arguments cannot be specified for loading block pointers4`tl.int1` should be rewritten in `tl.make_block_ptr`z@Padding option `nan` is not supported for integer block pointers)r,   rp   
element_tyr.   rf   rR   r   r  r  r  rx  r&   r'   create_tensor_pointer_loadr   )r   ptrmaskr   r  r  r  r  is_volatileelt_tyr  s              r   _load_block_pointerz"TritonSemantic._load_block_pointer  s     u0jkk$$// X"XX ==?w"*;*;*C*CC_`` $$ ::>6KbKbKde {{LL33CJJPWY^`hjuv 	r   c	           
     t   |j                   j                  j                         s't        d|j                   j	                          d      ||t        d      |s|rt        d      |j                   j                         sN|r%|j                   j                         rt        d      |r%|j                   j                         rt        d      |j                   j                         rX|*| j                  ||j                   j                               }|*| j                  ||j                   j                               }|j                   j                  }	|	j                  }
|
t        j                  k(  }|rBt        j                  }
t        j                  |
|	j                        }	| j                  ||	      }|| j                  ||
      }|j                   j                         r|j                   j                  |
      }n|
}|9| j!                  | j"                  j%                  |j&                  |||      |      }nR| j!                  | j"                  j)                  |j&                  |j&                  |r|j&                  nd |||      |      }|r | j                  |t        j                        }|S )NUnsupported ptr type z in `tl.load`z)`other` cannot be provided without `mask`z`padding_option` or `boundary_check` argument is not supported for loading a tensor ofpointers or loading a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadEMask argument cannot be block type if pointer argument is not a blockzFOther argument cannot be block type if pointer argument is not a block)rp   r~   rw   r,   r   rK  r~  rx  r  r.   rf   int8pointer_typeaddress_spacer   r   r&   r'   create_loadr   create_masked_load)r   r   r  r   r  r  r  r  r  ptr_tyr  r  r  r   s                 r   _load_legacyzTritonSemantic._load_legacy  sK   xx%%'4SXX5F5F5H4IWXX <E-HIIn X Y Y
 xx  "		**, !hii,,. !ijj 8800sxx7P7P7RS 11%9R9R9TU "" BGG#WWF__VV-A-ABF))C(C IIeV,E 88XX--f5F F <++dll66szz5(T_`bhiC++//

DKKY^dhjo08+GHNPC ))C)C
r   c	           
     D   | j                  |      }	| j                  |      }
| j                  |      }|j                  j	                         r<|j                  j
                  j                         r| j                  ||||||	|
|      S | j                  ||||||	|
|      S r)   )	r  r  r  rp   rw   r  rK  r  r  )r   r   r  r   r  r  r  r  r  r  r  r  s               r   loadzTritonSemantic.load2  s     00@//@--n=88??!4!4!=!=!?++CungW\^fhstt $$S$~wPUW_almmr   c                   t        |t        j                        sJ t        |j                        }t        |      |k(  sJ d| dt        |              | j                  |d      }| j                  j                  |j                  || j                  |      | j                  |            }| j                  ||j                        S )N	expected  offsets, but got Frequire_i64)rc   r.   tensor_descriptor_baserL  r  _convert_to_ir_valuesr'   create_descriptor_loadr   r  r  r&   r5  )r   descoffsetsr  r  ndimrq   s          r   descriptor_loadzTritonSemantic.descriptor_load@  s    $ 9 9:::4##$7|t#Wy6HW%WW#,,W%,HLL//WdFfFfguFv040L0L_0]_{{1doo..r   c                    t        |t        j                        sJ t        |j                        }t        |      |k(  sJ d| dt        |              |j
                  |j                  k(  sJ y )Nr  r  )rc   r.   r  rL  r  r<  )r   r  rH   r  r  s        r   validate_store_likez"TritonSemantic.validate_store_likeK  si    $ 9 9:::4##$7|t#Wy6HW%WW#{{d.....r   c                    | j                  |||       | j                  |d      }| j                  | j                  j	                  |j
                  |j
                  |      t        j                        S NFr  )r  r  r&   r'   create_descriptor_storer   r.   void)r   r  rH   r  s       r   descriptor_storezTritonSemantic.descriptor_storeQ  s]      ug6,,W%,H{{4<<??U\\[bcegelelmmr   c                    | j                  |||       |j                  t        j                  t        j                  t        j
                  t        j                  t        j                  t        j                  hv sJ d       | j                  |d      }t        j                  j                  }| j                  | j                  j                  ||j                   |j                   |      t        j"                        S NUnsupported dtypeFr  )r  r9   r.   rh   r/   rj   rK   rI   rJ   r  r   DESCRIPTOR_REDUCE_KINDADDr&   r'   create_descriptor_reducer   r"  r   r  rH   r  rG   s        r   descriptor_atomic_addz$TritonSemantic.descriptor_atomic_addV  s      ug6zzbii299bjj"**VXVaVabbwdwwb,,W%,H((,,{{4<<@@t{{TYT`T`bijlnlslsttr   c                ~    t         j                  j                         }|j                  dk(  xr |j                  dk\  S )NcudaZ   )r   activeget_current_targetbackendarch)r   targets     r   _has_native_tmazTritonSemantic._has_native_tma]  s1    113&(>V[[B->?r   c                B   |t         j                  t         j                  t         j                  t         j                  t         j
                  t         j                  hv sJ d       |t         j
                  t         j                  hv r| j                         sJ d       y y )Nr&  z-16-bit float types require native tma support)r.   rh   r/   rj   ri   rI   rJ   r4  )r   r9   s     r   $_descriptor_atomic_min_max_supportedz3TritonSemantic._descriptor_atomic_min_max_supporteda  sm    BHHbii2::r{{[[p]pp[RZZ--'')Z+ZZ) .r   c                X   | j                  |||       | j                  |j                         | j                  |d      }t        j
                  j                  }| j                  | j                  j                  ||j                  |j                  |      t        j                        S r   )r  r6  r9   r  r   r'  MINr&   r'   r)  r   r.   r"  r*  s        r   descriptor_atomic_minz$TritonSemantic.descriptor_atomic_minf        ug611$**=,,W%,H((,,{{4<<@@t{{TYT`T`bijlnlslsttr   c                X   | j                  |||       | j                  |j                         | j                  |d      }t        j
                  j                  }| j                  | j                  j                  ||j                  |j                  |      t        j                        S r   )r  r6  r9   r  r   r'  MAXr&   r'   r)  r   r.   r"  r*  s        r   descriptor_atomic_maxz$TritonSemantic.descriptor_atomic_maxm  r:  r   c                   | j                  |||       |j                  t        j                  t        j                  t        j
                  t        j                  hv sJ d       | j                  |d      }t        j                  j                  }| j                  | j                  j                  ||j                  |j                  |      t        j                        S r%  )r  r9   r.   rh   r/   rj   ri   r  r   r'  ANDr&   r'   r)  r   r"  r*  s        r   descriptor_atomic_andz$TritonSemantic.descriptor_atomic_andt        ug6zzbii299bhhGG\I\\G,,W%,H((,,{{4<<@@t{{TYT`T`bijlnlslsttr   c                   | j                  |||       |j                  t        j                  t        j                  t        j
                  t        j                  hv sJ d       | j                  |d      }t        j                  j                  }| j                  | j                  j                  ||j                  |j                  |      t        j                        S r%  )r  r9   r.   rh   r/   rj   ri   r  r   r'  ORr&   r'   r)  r   r"  r*  s        r   descriptor_atomic_orz#TritonSemantic.descriptor_atomic_or{  s      ug6zzbii299bhhGG\I\\G,,W%,H((++{{4<<@@t{{TYT`T`bijlnlslsttr   c                   | j                  |||       |j                  t        j                  t        j                  t        j
                  t        j                  hv sJ d       | j                  |d      }t        j                  j                  }| j                  | j                  j                  ||j                  |j                  |      t        j                        S r%  )r  r9   r.   rh   r/   rj   ri   r  r   r'  XORr&   r'   r)  r   r"  r*  s        r   descriptor_atomic_xorz$TritonSemantic.descriptor_atomic_xor  rA  r   c                   t        |t        j                        sJ |dk(  sJ d       |dk(  sJ d       t        |j                        dk(  sJ d|j                          |j                  d   dk(  sJ d|j                          t        |j
                        dk(  sJ d	|j
                          |j
                  d   d
k\  sJ d|j
                          |j                  }d|j                  z  d
z  }|j                  d   |k\  sJ d| d| d|j                  d           t        j                  |j                  |j
                  d   |j                  d   g      }| j                  |fd      d   }| j                  j                  |j                  |j                  ||j                  | j                              }	| j                  |	|      S )N z#cache modifier is not supported yetz$eviction policy is not supported yetr   descriptor must be 2D, but got r   r   *descriptor block must have 1 row, but got x offsets must be 1D, but got    z5descriptor gather must have at least 8 rows, but got r4  zdescriptor gather of  must have at least  columns, but got Fr  )rc   r.   r  rL  r  r<  r9   r  r5  r  r'   create_descriptor_gatherr   r   r&   )
r   r  	x_offsetsy_offsetr  r  r9   min_colsrp   rq   s
             r   descriptor_gatherz TritonSemantic.descriptor_gather  s   $ 9 9:::#J%JJ#"$L&LL$ 4##$)_-LTM]M]L^+__)"a'h+UVZVfVfUg)hh' 9??#q(\,J9??J[*\\( q!Q&q*_`i`o`o_p(qq&

111A5 	A3E7:NxjXjkok{k{|}k~j  A	A  }}TZZ)//!*<d>N>Nq>Q)RS--xl-NqQLL11$++y?O?OQY[_[e[efjfrfr[st{{1d##r   c                   t        |t        j                        sJ t        |j                        dk(  sJ d|j                          |j                  d   dk(  sJ d|j                          t        |j
                        dk(  sJ d|j                          |j
                  d   dk\  sJ d|j
                          |j                  }d	|j                  z  dz  }|j                  d   |k\  sJ d
| d| d|j                  d           | j                  |fd      d   }| j                  j                  |j                  |j                  |j                  |       | j                  d t        j                        S )Nr   rJ  r   r   rK  rL  rM  z6descriptor scatter must have at least 8 rows, but got r4  zdescriptor scatter of rN  rO  Fr  )rc   r.   r  rL  r  r<  shapaer9   r  r  r'   create_descriptor_scatterr   r&   r"  )r   r  rH   rQ  rR  r9   rS  s          r   descriptor_scatterz!TritonSemantic.descriptor_scatter  s   $ 9 9::: 4##$)_-LTM]M]L^+__)"a'h+UVZVfVfUg)hh' 9??#q(],J9K[K[J\*]]( q!Q&r*`ajapap`q(rr&

111A5 	B4UG;OPXzYklpl|l|}~l  lA  B	B  --xl-NqQ..t{{ELL)JZJZ\de{{4))r   c           	        |t        d      |j                  j                  j                         }|j                  j	                         s| j                  ||      }|j                  j	                         sJ d       ||j                  j                         k(  s&J d| d|j                  j                          d       |j                  j                  j                  |j                  j                  k(  s@J d|j                  j                  j                   d|j                  j                   d       |j                  j                  j                  }|t        j                  k7  sJ d       | j                  ||      }| j                  ||      }| j                  | j                  j                  |j                  |j                  |||      t        j                        S )	Nr  z-Value argument must be block type or a scalarzBlock shape(z) and value shape(z
) mismatchzBlock element type(z) and value element type(r  )r,   rp   r  rx  rK  r~  r.   rf   r  r   r&   r'   create_tensor_pointer_storer   r"  )	r   r   valr  r  r  r  r  r  s	            r   _store_block_pointerz#TritonSemantic._store_block_pointer  s    jkk hh))::<xx  "++C=Cxx  "S$SS"chh77 
 
 	a+&89R9R9T8UU_`	a 
xx""--1D1DD  	uH[\_\d\d\o\o\z\z[{  |U  VY  V^  V^  Vi  Vi  Uj  jt  Gu  	uD$$// X"XX  ::>;W iiV$ {{LL44SZZ^]bdlmoqovovx 	xr   c           	     (   |j                   j                  j                         s't        d|j                   j	                          d      |rt        d      |j                   j                         sL|j                   j                         rt        d      |r%|j                   j                         rt        d      |j                   j                         rV| j                  ||j                   j                               }|*| j                  ||j                   j                               }|j                   j                  }|j                  }|t        j                  k(  rBt        j                  }t        j                  ||j                        }| j                  ||      }| j                  ||      }|P| j                  | j                   j#                  |j$                  |j$                  ||      t        j&                        S |j                   j                  j)                         st        d      | j                  | j                   j+                  |j$                  |j$                  |j$                  ||      t        j&                        S )Nr  z in `tl.store`z`boundary_check` argument is not supported for storing a tensor of pointers or storing a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadzFValue argument cannot be block type if pointer argument is not a blockr  "Mask must have boolean scalar type)rp   r~   rw   r,   r   rK  r~  rx  r  r.   rf   r  r	  r
  r   r&   r'   create_storer   r"  r  create_masked_store)	r   r   r[  r  r  r  r  r  r  s	            r   _store_legacyzTritonSemantic._store_legacy  s   xx%%'4SXX5F5F5H4IXYY  E F F
 xx  "xx  " !ijj		**, !hii 88++C1J1J1LMC00sxx7P7P7RS"" RWWWWF__VV-A-ABF))C(C iiV$ <;;t||88SZZQVX`acecjcjkkyy'')ABB{{4<<;;CJJ

TXT_T_afhpq77$ 	$r   c                   | j                  |      }| j                  |      }|j                  j                         s$|j                  j                  j                         rt        d      |j                  j                         r:|j                  j                  j                         r| j                  ||||||      S | j                  ||||||      S )N"Cannot store to a constant pointer)r  r  rp   is_constr~   r,   rw   r  rK  r\  ra  )	r   r   r[  r  r  r  r  r  r  s	            r   storezTritonSemantic.store  s     11.A//@88#((//":":"<ABB88??!4!4!=!=!?,,S#t^UT\]] %%c3neXVVr   c           	     f   | j                  |      }| j                  |      }|j                  j                  j                  }|j
                  dvrt        d      | j                  | j                  j                  |j                  |j                  |j                  ||      |j                        S )N)   r4  r   z9atomic_cas only supports elements with width {16, 32, 64})r  r  rp   r~   r  r  r,   r&   r'   create_atomic_casr   )r   r   cmpr[  r  r  r  s          r   
atomic_caszTritonSemantic.atomic_cas  s    s#""5)XX__//
((<XYY{{4<<99#**cjjRUR\R\^achiknksksttr   c                `   |j                   j                  j                         s&t        d|j                   j	                         z         |j                   j                         s$|j                   j                  j                         rt        d      |j                   j                  j                  }|t        j                  u r|dk7  rt        d|z   dz         |t        j                  u r|dk7  rt        d|z   dz         |t        j                  t        j                  fv s|j                  dk  rt        d|z   dz   t        |      z         |j                   j                         rX|*| j                  ||j                   j!                               }|*| j                  ||j                   j!                               }| j#                  ||j                   j                  j                        }|| j$                  j'                  d	      }t        j(                  }|j                   j                         r^|j                   j+                  t        j(                        }| j$                  j-                  |j/                  | j$                        |      }| j1                  ||      }|||fS )
Nz)Pointer argument of store instruction is rc  r   atomic_z does not support fp16z does not support bf16rg  z does not support T)rp   r~   rw   r,   r   rd  r  r.   rI   rJ   int16uint16r  r  rK  r~  rx  r   r'   re   rf   r   rM  r   r&   )r   r   r[  r  opr  mask_irmask_tys           r   atom_red_typechecking_implz)TritonSemantic.atom_red_typechecking_impl  s   xx%%'H388K\K\K^^__88#(("5"5">">"@ABBXX__//
#eY^.FFGG$uY^.FFGG"((BII..*2O2ORT2TY^.BBS_TUU8800sxx7P7P7RS//SXX5N5N5PQiiSXX__778<ll++D1GggGxx  "((22277;,,33GMM$,,4OQXY;;w0DC~r   c                    |j                   j                  }t        j                  |d      }| j	                  ||      }| j                  ||dz
        }| j                  |t        j                        S )NF)r  signedr   )r9   r  r.   get_int_dtyper   r  r   rf   )r   rq   r  idtypeixsignbits         r   _signbitzTritonSemantic._signbit6  s\    77--!!8EB\\!V$))B1-yy"''**r   c                <   | j                  |||d      \  }}}| j                  |      }| j                  |      }|j                  j                  }|j                         r|j                         rp| j                  | j                  j                  t        j                  j                  |j                  |j                  |j                  ||      |j                        S | j                  | j                  j                  t        j                  j                  |j                  |j                  |j                  ||      |j                        S |t        j                   t        j"                  hvrt%        d|       |t        j                   k(  rt        j&                  nt        j(                  }| j+                  ||      }| j+                  |t        j,                  |d            }	|t        j                   k(  rt        j.                  nt        j0                  }
| j+                  ||
      }| j+                  |t        j,                  |
d            }| j3                  |      }| j5                  |      }| j                  | j                  j                  t        j                  j                  |	j                  |j                  | j7                  ||      j                  ||      |j                        }| j                  | j                  j                  t        j                  j8                  |j                  |j                  | j7                  ||      j                  ||      |j                        }| j;                  |||      }| j+                  ||      S )Nr   z#atomic_max not supported for dtype r   )rr  r  r  rp   r~   rR   r   r&   r'   create_atomic_rmwr   	ATOMIC_OPr<  r   UMAXr.   rK   rM   r<   r/   ri   r   r	  rh   rj   ry  r   r   UMINwherer   r   r[  r  r  r  sca_tyi_typei_vali_ptrui_typeui_valui_ptrnegpospos_retneg_retr   s                     r   
atomic_maxzTritonSemantic.atomic_max=     88c4OS$s#""5)==?##%{{LL222<<3C3CSZZQTQ[Q[]a]h]hjmotuHH  {{LL222<<3D3DcjjRUR\R\^b^i^iknpuvHH  "**bjj11A&JKK#rzz1rxxS&)S"//&!"<=%3"))c7+c2??7A#>?mmC iin++LL**2<<+;+;U\\5<<+/99T3+?+F+FUTUZU_U_a ++LL**2<<+<+<fmmV]]+/99T3+?+F+FUTU[U`U`b jjgw/||C((r   c                <   | j                  |||d      \  }}}| j                  |      }| j                  |      }|j                  j                  }|j                         r|j                         rp| j                  | j                  j                  t        j                  j                  |j                  |j                  |j                  ||      |j                        S | j                  | j                  j                  t        j                  j                  |j                  |j                  |j                  ||      |j                        S |t        j                   t        j"                  hvrt%        d|       |t        j                   k(  rt        j&                  nt        j(                  }| j+                  ||      }| j+                  |t        j,                  |d            }	|t        j                   k(  rt        j.                  nt        j0                  }
| j+                  ||
      }| j+                  |t        j,                  |
d            }| j3                  |      }| j5                  |      }| j                  | j                  j                  t        j                  j                  |	j                  |j                  | j7                  ||      j                  ||      |j                        }| j                  | j                  j                  t        j                  j8                  |j                  |j                  | j7                  ||      j                  ||      |j                        }| j;                  |||      }| j+                  ||      S )Nr   z#atomic_min not supported for dtype r   )rr  r  r  rp   r~   rR   r   r&   r'   r{  r   r|  r8  r   r~  r.   rK   rM   r<   r/   ri   r   r	  rh   rj   ry  r   r   r}  r  r  s                     r   
atomic_minzTritonSemantic.atomic_minc  r  r   c           
        | j                  |||d      \  }}}| j                  |      }| j                  |      }|j                  j                  }|j                         rt        j                  j                  nt        j                  j                  }| j                  | j                  j                  ||j                  |j                  |j                  ||      |j                        S )Nr   )rr  r  r  rp   r~   rx   r   r|  FADDr(  r&   r'   r{  r   )r   r   r[  r  r  r  r  ro  s           r   
atomic_addzTritonSemantic.atomic_add  s    88c4OS$s#""5)"("4"4"6R\\BLL<L<L{{4<<99"cjj#**VZVaVacfhmn88% 	%r   c           
     V   | j                  |||d      \  }}}| j                  |      }| j                  |      }| j                  | j                  j                  t        j                  j                  |j                  |j                  |j                  ||      |j                        S )Nand)rr  r  r  r&   r'   r{  r   r|  r?  r   rp   r   r   r[  r  r  r  s         r   
atomic_andzTritonSemantic.atomic_and      88c4OS$s#""5){{LL**2<<+;+;SZZUYU`U`beglmorowowy 	yr   c           
     V   | j                  |||d      \  }}}| j                  |      }| j                  |      }| j                  | j                  j                  t        j                  j                  |j                  |j                  |j                  ||      |j                        S )Nor)rr  r  r  r&   r'   r{  r   r|  rC  r   rp   r  s         r   	atomic_orzTritonSemantic.atomic_or  s    88c4NS$s#""5){{LL**2<<??CJJ

TXT_T_adfklnqnvnvx 	xr   c           
     V   | j                  |||d      \  }}}| j                  |      }| j                  |      }| j                  | j                  j                  t        j                  j                  |j                  |j                  |j                  ||      |j                        S )Nxor)rr  r  r  r&   r'   r{  r   r|  rF  r   rp   r  s         r   
atomic_xorzTritonSemantic.atomic_xor  r  r   c           
     V   | j                  |||d      \  }}}| j                  |      }| j                  |      }| j                  | j                  j                  t        j                  j                  |j                  |j                  |j                  ||      |j                        S )Nxchg)rr  r  r  r&   r'   r{  r   r|  XCHGr   rp   r  s         r   atomic_xchgzTritonSemantic.atomic_xchg  s    88c4PS$s#""5){{LL**2<<+<+<cjj#**VZVaVacfhmnHH 	r   c                   |j                         | j                  j                  j                  v s+J d| j                  j                  j                   d|        |j	                         }|dk(  rd}t        t        j                  |      S )Nzinput_precision must be one of . Got TF32X3TF32x3)lowerr'   r   allowed_dot_input_precisionsupperrA  r   INPUT_PRECISION)r   input_precisions     r   _str_to_dot_input_precisionz*TritonSemantic._str_to_dot_input_precision  s    $$&$,,*>*>*[*[[ 	y-dll.B.B._._-``fgvfwx	y[)//1h&&Or))?;;r   c           
        |j                   j                         r|j                   j                         sJ |j                  j                         r|j                  j                         rn|j                  t        j
                  t        j                  t        j                  t        j                  t        j                  fv sJ d|j                          |j                  t        j
                  t        j                  t        j                  t        j                  t        j                  fv sJ d|j                          |j                  |j                  k(  s!J d|j                   d|j                          |j                  j                         s|j                  j                         rwd| j                  j                  j                  v rt        j                  d       | j!                  |t        j                        }| j!                  |t        j                        }| | j                  j                  j"                  }| j%                  |      }t'        |j(                        }t'        |j(                        }||cxk(  rdk(  s1n ||cxk(  rdk(  s$n J d	|j(                   d
|j(                   d       |j(                  d   j*                  |j(                  d   j*                  k(  sVJ d|j(                   d|j(                   d|j(                  d   j*                   d|j(                  d   j*                   d	       | j                  j,                  j/                  d      	 J d        | j                  j,                  d   |j                   |j                         }	|j(                  d   j*                  |	d   k\  r>|j(                  d   j*                  |	d   k\  r|j(                  d   j*                  |	d   k\  sJ d|	d    d|	d    d|	d           |j                   j0                  j3                         rZ|j                   j0                  t        j
                  k(  sJ d       | j                  j5                  d      }
t        j6                  }n|j9                         rt;        d      |j                   j0                  j=                         s$|j                   j0                  j9                         r,| j                  j?                  d      }
t        j                  }nH|jA                         r| j                  jC                  d      n| j                  j?                  d      }
|}|j                   j(                  d   }|j                   j(                  d   }|j                   j(                  d   }|dk(  r|j                   j(                  d   nd }t	        jD                  ||r|||gn||g      }|6| j                  jG                  |jI                  | j                        |
      }n|jJ                  }|j                   |k(  sJ |X|j                  j                         r;|j                  j                         r!| j                  j                  jL                  }nNd}nK|j                  j                         r1|j                  j                         r||kD  rt;        d| d| d      | jO                  | j                  jQ                  |jJ                  |jJ                  |||      |      S )NzUnsupported lhs dtype zUnsupported rhs dtype z&Both operands must be same dtype. Got r   fp8e4b15zthe use of fp8e4b15 is deprecated on Hopper and later architectures and can cause significant slow down. It will be removed in a future triton releaser      +Both inputs must be either 2D or 3D; (lhs: 	 vs rhs: r  r^  zFirst input shape (z) and second input shape z= are not compatible for matmul (second index of first shape (z0) must be equal to first index of second shape (min_dot_sizez2target doesn't provide lower shape bounds for dot.r   r   zInput shapes should have M >= z, N >= z
 and K >= zonly int8 supported!zhout_dtype=bfloat16 is unsupported. Please use out_dtype=float32/float16 and cast with `.to(tl.bfloat16)`zmax_num_imprecise_acc (z) must be <= K ())rp   rK  r9   rQ   r.   r  uint8rI   rJ   rK   r  r'   r   !deprecated_fp8_dot_operand_dtypeswarningswarnr   default_dot_input_precisionr  rL  r<  rH   r  r  r~   rR   	get_int32r/   rP   r,   rN   get_fp32rO   get_fp16r5  rM  r   r   max_num_imprecise_acc_defaultr&   
create_dot)r   r   r   accr  max_num_imprecise_acc	out_dtypelhs_rankrhs_rankr  r  ret_scalar_tyMNKBr   
acc_handles                     r   dotzTritonSemantic.dot  sh   xx  "sxx'8'8':::99#))"2"2"499"((BJJ!#!- - S0Fsyyk.RS -99"((BJJ!#!- - S0Fsyyk.RS -99		)o-STWT]T]S^^cdgdmdmcn+oo)99  "cii&;&;&=T\\11SSS m ))C,C))C,C""ll22NNO::?Ksyy>syy>8(q(H,EA,E  	VItuxu~u~t  @I  JM  JS  JS  IT  TU  HV  	VEyy}""cii' 	u,SYYK7PQTQZQZP[  \Y  Z]  Zc  Zc  df  Zg  Zm  Zm  Yn  n^  _b  _h  _h  ik  _l  _r  _r  ^s  st  u	u ||''++#'( 	^)]	^ (?t||//?#((Syy}""l1o5#))B-:M:MQ]^_Q`:`		"##|A6	v0a0AVWHYYcdpqrdsctu	v 7 88??!!#88??bgg-E/EE-''*BHHM z  XX__$$&#((//*A*A*C&&q)BJJM-6->->-@&&q)dllF[F[\]F^B%MHHNN2HHNN2HHNN2!)QCHHNN1D}1q!Qi1a&I;226<<3MrRJJ88v%%% !(yy!cii&6&6&8(,(<(<(Z(Z%()%yy!cii&6&6&8=RUV=V #:;P:QQabcadde!fgg{{LL##CJJ

JYnoqwy 	yr   c                z    t        t        j                  |j                         d       }|t	        d| d      |S )NzInvalid float format: r^   )rA  r   ScaleDotElemTypeTYr  r,   )r   float_formatty_enums      r   _str_to_fp_typezTritonSemantic._str_to_fp_type	  s>    "//1C1C1EtL?5l^1EFFr   c                <   t         j                  t         j                  t         j                  t         j                  dj                  |      }|B|dk(  s
J d|        |j                  t         j                  k(  sJ d|j                          |S |j                  |k(  r|S t         j                  t         j                  t         j                  t         j                  d|   }|j                  |k(  sJ d| d|j                          | j                  ||      S )z
        If float_format is subbyte, make sure it's packed as uint8 and return it.
        Otherwise, return a tensor (perhaps bitcasting) of the specified float format.
        )e5m2e4m3bf16fp16e2m1z)Internal Error: Unexpected float format: z)e2m1 format must be packed as uint8. Got zUnexpected dtype for r  )
r.   float8e5
float8e4nvrJ   rI   r  r9   r  rn  r   )r   r[  r  	triton_tyunsigned_tys        r   _bitcast_to_fp_typez"TritonSemantic._bitcast_to_fp_type  s    
  [["--ZZ!!$\!2 	6)e-VWcVd+ee)99(a,UVYV_V_U`*aa(J99	!J#%88RXXryyZ\ZcZcdeqrK99+d/D\NRXY\YbYbXc-dd+<<Y//r   c                ~   |j                   j                         r|j                   j                         sJ t        |j                        }t        |j                        }||cxk(  rdk(  s1n ||cxk(  rdk(  s$n J d|j                   d|j                   d       |j                  }|j                  }| j                  |      }| j                  |      }h d}||v s
J d|        ||v s
J d|        |d u xs* t        |t        j                        xr |j                  d u }|d u xs* t        |t        j                        xr |j                  d u }| j                  ||      }| j                  ||      }|	s|d	k(  sJ d
       |
s|d	k(  sJ d
       |j                   j                  dd  \  }}|j                   j                  dd  \  }}|d	k(  rdnd}|d	k(  rdnd}|	r||z  n|}|
r||z  n|}||k(  s"J d|j                   d|j                   d       |dk(  r|j                   j                  d   nd }|	s||z  }|
s||z  }t        j                  ||r|||gn||g      }| j                  j                  d      }|6| j                  j                  |j                  | j                        |      }n|j                  }|j                   |k(  sJ |rd n|j                  }|rd n|j                  } | j!                  | j                  j#                  |j                  | ||j                  ||||	|
|
      |      S )Nr   r  r  r  r  >   r  r  r  r  r  zNYI: lhs_format zNYI: rhs_format r  zBonly mxfp4 inputs can be packed along a dimension different than Kr  r   zCReduction dimension should pack the same number of elements; (lhs: r   )rp   rK  rL  r<  rH   r  rc   r.   rn   r  r5  r'   r  rM  r   r   r&   create_dot_scaled)!r   r   	lhs_scale
lhs_formatr   	rhs_scale
rhs_formatr  	fast_math
lhs_k_pack
rhs_k_packr  r  r  lhs_format_enumrhs_format_enumallowed_formatsrhs_scale_is_nonelhs_scale_is_noner  K_LHSK_RHSr  PACKED_APACKED_BPACKED_A_DIMPACKED_B_DIMr  r   r  r  rhs_scale_handlelhs_scale_handles!                                    r   
dot_scaledzTritonSemantic.dot_scaled!  s    xx  "sxx'8'8':::syy>syy>8(q(H,EA,E  	VItuxu~u~t  @I  JM  JS  JS  IT  TU  HV  	VE$**
$**
..z:..z:B_,M0@.MM,_,M0@.MM,%-r*Y2U2qZcZiZimqZq%-r*Y2U2qZcZiZimqZq&&sJ7&&sJ7Z61w3ww1Z61w3ww188>>"#&588>>"#&q"f,1!"f,1!+5x%'5+5x%'5|+  	T/rsvs|s|r}  ~G  HK  HQ  HQ  GR  RS  .T  	T+!)QCHHNN1DHAHAyq1a)q!fE\\""1%;226<<3MrRJJ88v%%%#44):J:J#44):J:J{{LL**3::7GZ]ZdZdfv+:IzS]_iklrt 	tr   c                P   |j                   t        j                  k7  r"t        j                  d|j                           | j                  |t        j                        }| j                  ||dd      \  }}|j                  j                         r+| j                  ||      \  }}| j                  ||      \  }}n| j                  ||      \  }}|j                  }| j                  | j                  j                  |j                  |j                  |j                        |      S )Nzgtl.where with a non-boolean condition is deprecated and will error out in a future triton release. Got T)r9   r.   rf   r  r  r   r   rp   rK  r   r&   r'   create_selectr   )r   	conditionrq   r   r  r   s         r   r  zTritonSemantic.whereU  s   ??bgg%MMy  {D  {J  {J  zK  L IIi1	00AtTB1>>""$44YBLIq,,Q2DAq44YBLIq{{4<<55i6F6FRSRZRZ[]cddr   c                \    |rt        j                  ||      }n|}| j                  ||      S r)   )r.   r5  r&   )r   rq   rV   r  res_tys        r   wrap_tensorzTritonSemantic.wrap_tensori  s-    ]]9i8F F{{1f%%r   c                &   	
 |t         fdD              d}d   j                  j                  
t        
      }||k  sJ d| d       t	        
      D cg c]  \  }}||k7  s| c}}	t        
fdD              sJ d        j                  j                  D cg c]  }|j                   c}|       |       j                         sJ t        	 fdt        t                    D              S c c}}w c c}w )Nc              3  p   K   | ]-  }j                  ||j                  j                  gd        / yw)Tr_  N)rS  rE  rH   )rn  tr   s     r   rp  z+TritonSemantic.reduction.<locals>.<genexpr>s  s+     ^RS4<<AGGMM?<M^s   36r   z&reduction axis must be < inputs rank (r  c              3  P   K   | ]  }|j                   j                  k(    y wr)   )rp   r<  )rn  r  r<  s     r   rp  z+TritonSemantic.reduction.<locals>.<genexpr>z  s     9Q166<<5(9s   #&z-all reduction inputs must have the same shapec              3     K   | ]=  }j                  j                  |      |   j                  j                         ? y wr)   r  
get_resultrp   r~   )rn  r|  inputs	reduce_opr  r   s     r   rp  z+TritonSemantic.reduction.<locals>.<genexpr>  s@      u\]DY11!4fQinn6K6KYWu   AA)tuplerp   r<  rL  ry  allr'   create_reducer   verifyr;  )r   r  r1   region_builder_fnrankr|  rR  r  r  r  r<  s   ``      @@@r   	reductionzTritonSemantic.reductionq  s   <^W]^^FDq	$$5zd{LDTF!LL{#,U#3A41aqDyQA	9&99j;jj9LL..&/IQ/I4P	)$!!! uafgjkqgrasu u 	u B 0Js    D.D*Dc                    d   j                   j                  t              }| |cxk  r|k  sn J d| d| d       |dk  r||z  }D ]"  }|j                   j                  k(  rJ d         j                  j	                  D cg c]  }|j
                   c}||       |       j                         sJ t         fdt        t                    D              S c c}w )Nr   z
scan axis z must be < inputs rank (r  z(all scan inputs must have the same shapec              3     K   | ]=  }j                  j                  |      |   j                  j                         ? y wr)   r  )rn  r|  r  scan_opr   r<  s     r   rp  z2TritonSemantic.associative_scan.<locals>.<genexpr>  s;     w_`T%%g&8&8&;VAY^^=R=RTYZwr  )	rp   r<  rL  r'   create_scanr   r  r   r;  )	r   r  r1   r  reverser  r  r	  r<  s	   ``     @@r   associative_scanzTritonSemantic.associative_scan  s    q	$$5zu#t#Wz$7OPTvUV%WW#!8DLD 	UA66<<5(T*TT(	U ,,**f+EAHH+EtWU'"~~wdijmntjudvwww	 ,Fs   C.c                   |j                   j                         sJ d       t        |j                  j                        }t        |j                  j                        |k(  sJ d       | |cxk  r|k  sn J d| d| d       |dk  r||z  }t        |      D ]F  }||k(  r	|j                  j                  |   |j                  j                  |   k(  r=J d| d        | j                  j                  |j                  |j                  |      }| j                  ||j                  j                  |j                  j                        S )	Nzindex must be an integer tensorz0source and index tensors must have the same rankzgather axis z must be < source rank (r  r   z
index dim z( must match the corresponding source dim)r9   rR   rL  rp   r<  r;  r'   create_gatherr   r  r~   )r   srcindexr1   r  ro  gathers          r   r  zTritonSemantic.gather  s1   {{!!#F%FF#388>>"5::##$,`.``,u#t#Y|D69QRVQWWX%YY#!8DLDt 	yADy::##A&#((..*;;xz$Ow=xx;	y
 ++CJJdK9I9IJJr   c                   t        |j                        dk(  sJ d       |j                  j                         sJ d       |W| j	                  ||j                        }|j
                  j                  j                         st        d      |j                  }| j                  | j                  j                  |j                  ||      t        j                  t        j                  |g            S )Nr   z histogram only supports 1D inputz%histogram only supports integer inputr^  )rL  r<  r9   rR   r~  rp   r~   r  r,   r   r&   r'   create_histogramr.   r5  r/   )r   r   num_binsr  s       r   	histogramzTritonSemantic.histogram  s    5;;1$H&HH${{!!#L%LL#,,T5;;?D99##++- !EFF;;D{{4<<88xQUV==H:>@ 	@r   c                   t        dt        |j                              t        |      k7  rt        d      |j                  j                  dt        j                  ||j                  j                                      |S )Nr   zAShape of input to multiple_of does not match the length of valuesztt.divisibility)	r   rL  r<  r,   r   set_attrr   	make_attrget_contextr   rq   r  s      r   multiple_ofzTritonSemantic.multiple_of  s[    q#agg,3v;.`aa	+R\\&!((BVBVBX-YZr   c                    t        |j                        t        |      k7  rt        d      |j                  j	                  dt        j                  ||j                  j                                      |S )NzDShape of input to max_contiguous does not match the length of valuesztt.contiguityrL  r<  r,   r   r  r   r  r  r  s      r   max_contiguouszTritonSemantic.max_contiguous  sS    qww<3v;&cdd	/2<<@T@T@V+WXr   c                    t        |j                        t        |      k7  rt        d      |j                  j	                  dt        j                  ||j                  j                                      |S )NzCShape of input to max_constancy does not match the length of valuesztt.constancyr  r  s      r   max_constancyzTritonSemantic.max_constancy  sS    qww<3v;&bcc	.",,vqxx?S?S?U*VWr   c                r    | j                  | j                  j                         t        j                        S r)   )r&   r'   create_barrierr.   r"  )r   s    r   debug_barrierzTritonSemantic.debug_barrier  s$    {{4<<668"''BBr   c                   |j                  d      s|r|dz  }|j                  d      s
|r|d d dz   }t        |      dkD  r|j                  d      sd|z   }|D cg c]  }|j                   }}|D cg c]  }|j                  j                          }}| j                  | j                  j                  ||||      t        j                        S c c}w c c}w )N rw  r^  r   )endswithrL  
startswithr   r9   r   r&   r'   create_printr.   r"  )r   prefixargshexargnew_args	is_signeds          r   device_printzTritonSemantic.device_print  s     s#cMFt$CR[4'Fv;?6#4#4S#96\F*./3CJJ//:>?3SYY,,.?	?{{4<<44VS(IVXZX_X_`` 0?s   C5!Cc                    | j                   j                  j                  sy | j                  | j                   j	                  |j
                  |      t        j                        S r)   )r'   r   debugr&   create_assertr   r.   r"  )r   r   r   s      r   r   zTritonSemantic.device_assert  sB    ||##)){{4<<55dkk3GQQr   c                    | j                  | j                  j                  |j                        t        j
                        S r)   )r&   r'   create_assumer   r.   r"  )r   r   s     r   assumezTritonSemantic.assume  s*    {{4<<55dkkBBGGLLr   c                r   t        |t              rt        j                  |      }t        |t        j                        rt        |j                  t
              r%| j                  j                  |j                        S |rQd|j                  cxk  rdk  sn J d|j                   d       | j                  j                  |j                        S d|j                  cxk  rdk  sn J d|j                   d       | j                  j                  |j                        S t        |t        j                        r|j                  j                  dk(  sJ d	       |j                  j                         sJ d
       |j                  t        j                  k7  rY|rW| j                  j                  |j                   | j                  j#                         |j                  j%                               S |j                  t        j&                  k7  r	|sJ d       |j                   S J dt)        |              )Nr\   r]   z@Block pointers only support 64 bit `shape/strides`, got a value z which is out of the rangerZ   r[   zFBlock pointers only support 32 bit `offsets/block_shape`, got a value r   z*Expected a scalar in shape/strides/offsetsz8Expected an integer scalar type in shape/strides/offsetszzBlock pointers only support 32 bit `offsets/block_shape`, add a `.to(tl.int32)` or use regular indexing for 64 bit supportz3Unsupported element type in shape/strides/offsets: )rc   rg   r.   rn   rH   rd   r'   re   r  r  r&   rE  r9   rR   ri   r   r   get_int64_tyr   r/   rp   )r   r  r  s      r   _convert_elem_to_ir_valuez(TritonSemantic._convert_elem_to_ir_value  s   dC <<%DdBLL)$**d+||,,TZZ883e3 J 8##'::,.H6J J3||--djj993e3 J 8##'::,.H6J J3||--djj99bii(::##q(V*VV(::$$&b(bb&zzRXX%+||33DKKAZAZA\48JJ4L4L4NP Prxx'W W Wu;;XKDQUJ<XXur   c                    t        |d      r |D cg c]  }| j                  ||       c}S | j                  ||      gS c c}w )Nr  )r  r8  )r   	list_liker  r  s       r   r  z$TritonSemantic._convert_to_ir_values  sF    9j)R[\$D224E\\..y+FGG ]s   A c           	        | j                  |      }| j                  |      }| j                  |d      }|j                  j                         r$|j                  j                  j	                         rt        d      |j                  j                  t        j                  k(  rH| j                  |t        j                  t        j                  |j                  j                              }t        d      sgD cg c]*  }t        |t        j                        r|j                  n|, c}t!        d D              sJ d       t        |d      s|g}|D cg c]*  }t        |t        j                        r|j                  n|, }}t#        |      t%        t'        t)        |                  k(  sJ d       t!        fd||||fD              sJ d	       | j*                  j-                  |j.                  ||||      }| j1                  |t        j                  t        j2                  |j                  j                                    S c c}w c c}w )
NFr  zMExpected `base` to be a pointer type (but not a block pointer type or others)r  c              3  `   K   | ]&  }t        |t              xr d |cxk  xr dk  nc  ( yw)rZ   r[   N)rc   rg   )rn  r  s     r   rp  z0TritonSemantic.make_block_ptr.<locals>.<genexpr>  s)     \:dC(CVt-Ce-CC\s   ,.zGExpected a list of constant integers (`int32_t` range) in `block_shape`z<Expected a permutation of (0, 1, ..., len(order)-1) in orderc              3  L   K   | ]  }t              t        |      k(    y wr)   )rL  )rn  r:  r  s     r   rp  z0TritonSemantic.make_block_ptr.<locals>.<genexpr>'  s     h)3{#s9~5hs   !$zBExpected shape/strides/offsets/block_shape to have the same length)r  rp   rw   r  rK  r,   r.   rf   r   r	  r  r
  r  rc   rn   rH   r  rq  rr  r;  rL  r'   create_make_block_ptrr   r&   r5  )	r   baser<  stridesr  r  orderr  r   s	        `   r   make_block_ptrzTritonSemantic.make_block_ptr
  s    **51,,W5,,W%,H yy!TYY%9%9%B%B%Dlmm 99277*99T2??277DII<S<S#TUD {J/&-KZefRVZbll%CtzzMf\P[\\ 	VU	V\ uj)GETYZDz$=4GZZe}U3u:%6 77w9ww7 hwX_afGghh 	QP	Qh 33DKKQXZeglm{{62??2==AUAUWb3c#dee% g [s   /I /Ic                    | j                  |d      }| j                  | j                  j                  |j                  |      |j
                        S r   )r  r&   r'   create_advancer   rp   )r   r?  r  s      r   advancezTritonSemantic.advance0  sC    ,,W%,H {{4<<66t{{GLdiiXXr   c           	     .   t        |      }d|cxk  rdk  sn t        d| d      t        |      |k7  rt        d| dt        |             t        |      |k7  rt        d| dt        |             t        |j                  t        j
                        sJ |j                  j                  j                  d	z  }t	        j                  |d
         }||z  dk  rt        d| d| d||z   d      t	        j                  |d
         |d
<   |d
   dk7  rt        d|d
          |D cg c]"  }| j                  |t        j                        $ }}|D cg c]"  }| j                  |t        j                        $ }}t	        j                  |      }t        |j                  t        j
                        sJ t	        j                  |j                  j                  |      }	|j                  }
|j                  j                  j!                         }| j"                  j%                  |
|D cg c]  }|j                   c}|D cg c]  }|j                   c}||      }t	        j&                  ||||	      S c c}w c c}w c c}w c c}w )Nr      z Expected 1 <= ndim <= 5 but got z dimensionsz	Expected z strides but got zExpected block_shape to have z dimensions but got rM  r^  rg  zRDescriptor block shape must have at least 16 bytes in the last dimension, but got z * z = z bytesz-Tensor descriptor last dim must be 1 but got )rL  r,   rc   r9   r.   r	  r  r  rU  rF  r/   ri   _unwrap_shaperp   r5  r   r   r'   create_make_tensor_descriptortensor_descriptor)r   r?  r<  r@  r  r  	elem_sizecontig_dim_sizerq   rp   base_handleis_signed_intrR  r   s                 r   make_tensor_descriptorz%TritonSemantic.make_tensor_descriptor7  s    5zTQ?v[QRRw<4y.?G~NOO{t#<TFBVWZ[bWcVdeff$**boo666JJ))<<A	11+b/BY&+detduux  zC  yD  DG  HW  Zc  Hc  Gd  dj  k  --gbk:2;!LWUW[MZ[[8=>1!!!RXX.>>:ABQ4##Arxx0BB &&{3$))R__555}}TYY11;?kk		,,::<;;K\aIbWX!((IbOV<W!QXX<WYdfsu##FE7DAA ?B Jc<Ws   1'J'J6J
J)r1   rg   returnr   )r=   tl.dtyper>   rQ  rP  rQ  )r=   rQ  rS   rd   r>   rQ  rT   rd   rU   rd   rP  rQ  )T)rr   rd   )r   rQ  r   rQ  ry   rd   rP  None)FFTF)r   TensorTy | numbers.Numberr   rS  rP  Tuple[TensorTy, TensorTy])r   r   r   r   r   callable)r   rS  r   rS  r   rd   rP  r   )r   rS  r   rS  rP  r   )r   rS  r   rS  r   rd   rP  r   )rq   r   r   r   r   tl.PropagateNan)rq   r   r   r   r   r   r   rV  )r   r   r   r   rP  rT  )r   r   r   r   rP  r   )r   r   )r   r   rP  r   )r  r   rP  tl.block_type)r7  rg   r8  rg   r   rW  rP  r   )r9   rQ  rP  r   )r<  	List[int]r9   rQ  rP  r   )rH   r   r<  rX  rP  r   )r   r   rP  rX  rQ  rd   rP  r   )r   r   r1   rg   rP  r   )r   r   r   r   rQ  rd   rP  r   )ra  r   rb  r   rP  r   )ra  r   rP  rT  )r   r   rt  
Tuple[int]rP  r   )r   r   r<  rY  rP  r   )r   r   r   r   rP  r   )r  Optional[str])r   r   r  rQ  rP  r   r)   )r   r   r  rQ  r  rZ  rP  r   )r   r   r  Optional[TensorTy]r   r[  r  r   r  r  r  r  r  r  r  rd   rP  r   )r  tl.tensor_descriptor_baser  r  r  r  rP  r   )r  r\  rH   r   rP  rR  )r  r\  rH   r   rP  r   )r  r  r  r  rP  r   )rH   r   rP  r   )r   r   r[  r   r  r[  r  r  r  r  rP  r   )r   r   ri  r   r[  r   r  r  r  r  rP  r   )
r   r   r[  r   r  r   ro  r  rP  z#Tuple[TensorTy, TensorTy, TensorTy])rq   r   rP  r   )r   r   r[  r   r  r   r  r  r  r  rP  r   )r   r   r   r   r  r   r  rZ  r  rg   r  rQ  rP  r   )r  r  )r[  r   r  r  )r   r   r  r   r  r  r   r   r  r[  r  r  r  zTensorTy | Noner  rd   r  rd   r  rd   r  rQ  rP  r   )r  r   rq   r   r   r   rP  r   )r  Sequence[TensorTy]r1   rg   rP  Tuple[TensorTy, ...])r  r]  r1   rg   r  rd   rP  r^  )r  r   r  r   r1   rg   rP  r   )r   r   r  rg   r  r[  rP  r   )rq   r   r  rX  rP  r   )rP  r   )r)  r  r*  List[TensorTy]r+  rd   rP  r   )r   r   r   r  rP  r   )r?  r   rP  r   )
r?  r   r<  r_  r@  r_  r  zList[tl.constexpr]rP  ztl.tensor_descriptor)yr    r!   r"   r.   r&   __annotations__langr   r2   r5   rC   rX   ro   rz   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r   r   r  r  r   r%  r   r-  r2  r>  rk   rF  rI  rH  rS  rX  r\  rf  rk  ru  r~  r   r  r   r   r  r  r  r  r  r  r  r  r  r  r  r  r#  r+  r4  r6  r9  r=  r@  rD  rG  rT  rX  r\  ra  re  rj  rr  ry  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r#  r/  r   r5  r8  r  rB  rE  rO  r
  r   r   r%   r%      s   YYFN&DO
QD05*.053;05d#R	@ ae05#:S#J&$>#>(0>>8#8(08"8#8(08]2>,8.9"9"	a\[\'&"
]]\)$/8888	8	8 GK Z$	)2AhfXV0

TW 2ptj$i9^		
,:xn n25nHKnZ^nckn	/),	/19	//n
u@[
uuuuu$0**x8*$XW"W'/W(u'*/R8+$)L$)L%yxy<Ly#&Ly3;Ly@HLy\0$.t0.t>A.tHW.tdh.t#.t15.tBJ.tOW.the(&u,x"&x+?x.K.	@CaR
MY4H
$fLY'B'B 'B  	'B
 ('B 
'Br   r%   )
__future__r   r  typingr   r   r   r   r   r	   r
   r|   triton.runtimer   _C.libtritonr   rI  r   r.   r   r   	Exceptionr   r%   r
  r   r   <module>rg     sV    "  J J J  !  CL:F	 FEBWX& EBr   