
    rhF                     V   d dl mZmZmZ  e       r
ddlZddlmZ  e       rddlmZ ddlZ ej                  e
      Zg dZd Zd Zej                  d	d
dej                   dedej$                  fdZ G d dej(                        Zd Zd Zd Zd Zd Z	 	 	 	 	 ddZ	 	 	 	 ddZy)   )is_accelerate_availableis_torch_availablelogging    N)nn)init_empty_weights)g        g      ?g      ?g      ?g       @g      @g      @g      @g       g      g      g      g       g      g      g      c                     ddl m}  || j                  t        j                        t        j
                  d      \  } }t        | |      \  } }| |fS )Nr   )downcast_to_mxfp   )axis)$triton_kernels.numerics_details.mxfpr
   totorchbfloat16uint8swizzle_mxfp4)wr
   w_scales      r/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/integrations/mxfp4.pyquantize_to_mxfp4r   3   sB    E!!$$u~~"6!LJAwq'*JAwg:    c                     ddl m}m}m} ddlm} ddlm} |j                  d      \  }} | || |      |fi |}  | ||      |      }| |fS )Nr   )FP4convert_layoutwrap_torch_tensor)layout)StridedLayoutr   )mx_axisdtype)	triton_kernels.tensorr   r   r   triton_kernels.tensor_detailsr   $triton_kernels.tensor_details.layoutr   "make_default_matmul_mxfp4_w_layout)	r   r   r   r   r   r   r   value_layoutvalue_layout_optss	            r   r   r   ;   s`    LL4B&,&O&OXY&O&Z#L#(#6ZHYZA .w7GGg:r   i   )r    rows_per_chunkr    r'   returnc                N   dd l }| j                  s>t        j                  j	                         r | j                         } |j                         }|j                  t        j                        dz
  }| j                  d d |j                  k(  s!J d| j                  d|j                         t        j                  t        || j                        }| j                  ^ }}}|j                  |      |z  }	| j                  |	|      } |j                  |	d      }t        j                  |	|dz  || j                        }
t        d|	|      D ]  }t        ||z   |	      }| || }||| }|d	z  j                  t        j                         }|d
z	  j                  t        j                         }|
|| }||   |d d dd df<   ||   |d d dd df<   t        j"                  |||       ~~~~~   |
j                  g |||dz   j$                  g |||z  dz   }
~ ~~|
S )Nr      zblocks.shape=z does not match scales.shape=)r    devicer   r         )out)mathis_cudar   cudais_availabler   int32shapetensor
FP4_VALUESr,   prodreshapeemptyrangeminlongldexpview)blocksscalesr    r'   r0   lutprefix_shapeGB
rows_totalr/   r0r1blkexpidx_loidx_hisubs                     r   convert_moe_packed_tensorsrN   T   s     >>ejj557YYu{{#c)F<<,_?]PVP\P\.__,
,,zv}}
EC ,,\1a<(1,J^^J*F^^J*F
++j!a%uV]]
KCAz>2 *n$j1RmRm *,(uzz*"Rj6{Aqt!tG6{Aqt!tGC#&FCc*" 4+#++
.|
.Q
.A
.
3
3
M\
M1q519
MC 	Jr   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )Mxfp4GptOssExpertsc           	      *   t         |           |j                  | _        |j                  | _        |j
                  | _        t        j                  t        j                  | j                  d| j                  z  | j
                  dz  dt        j                        d      | _        t        j                  t        j                  | j                  d| j                  z  | j
                  dz  t        j                        d      | _        t        j                  t        j                  | j                  d| j                  z  t        j                        d      | _        t        j                  t        j                  | j                  | j
                  | j                  dz  dft        j                        d      | _        t        j                  t        j                  | j                  | j
                  | j                  dz  t        j                        d      | _        t        j                  t        j                  | j                  | j
                  t        j                        d      | _        d| _        d | _        d | _        y )Nr          r   Frequires_gradgZd;?)super__init__num_local_expertsnum_expertsintermediate_sizehidden_sizer   	Parameterr   zerosr   gate_up_proj_blocksgate_up_proj_scalesfloat32gate_up_proj_biasdown_proj_blocksdown_proj_scalesdown_proj_biasalphagate_up_proj_precision_configdown_proj_precision_config)selfconfig	__class__s     r   rW   zMxfp4GptOssExperts.__init__   s   !33!'!9!9!--#%<<KK((!d.D.D*DdFVFVZ\F\^`hmhshst$
  $&<<KK((!d.D.D*DdFVFVZ\F\didodop$
  "$KK((!d.D.D*DEMMZjo"
 !#KK))4+;+;T=S=SWY=Y[]^fkfqfqr!
 !#KK(($*:*:D<R<RVX<X`e`k`kl!
 !llKK(($*:*:%--P`e
 
-1**.'r   hidden_statesr(   c                    ddl m}m}m} ddlm} t        j                  j                  |j                        5   | |d|d      | j                  d fd      }	 ||| j                  | j                  j                  t        j                        ||| j                  d |	      }
 ||
| j                  | j                   j                  t        j                        ||| j"                  |j$                        }d d d        |S # 1 sw Y   S xY w)	Nr   )FnSpecsFusedActivation
matmul_ogs)	swiglu_fnswiglu)re   limitr   )gather_indxprecision_configgammasfused_activation)scatter_indxrt   ru   )triton_kernels.matmul_ogsrm   rn   ro   triton_kernels.swiglurp   r   r2   r,   re   gate_up_projra   r   r`   rf   	down_projrd   rg   	gate_scal)rh   rk   routing_data
gather_idxscatter_idxrm   rn   ro   rp   actintermediate_cache1intermediate_cache3s               r   forwardzMxfp4GptOssExperts.forward   s    RR3ZZ}334 	!'(I?Q"RUYU_U_aeTfhijC",!!&&))%--8&!%!C!C!$	# #-###&&u}}5(!%!@!@#--#	0 #"1	0 #"s   B>DD)__name__
__module____qualname__rW   r   Tensorr   __classcell__)rj   s   @r   rP   rP      s'    !/F#U\\ #]b]i]i #r   rP   c                    dd l }ddlm}m}m}m} t        j                  j                  | j                        5  t        j                  j                         }t        |j                  j                  dd            }d}	| j                  d   }
| j                  d   }||z  }||z  }|dz   |z  }|
|z  }d } || |      \  }}t        j                  |d      }t        j                   |d      \  }}t        j"                  |d|      }|j%                  d      }t        j&                  |||dz
        || }|j)                  d      j+                  t        j,                        }d	}t        j.                  ||k  ||      }t        j0                  |d
      j+                  t        j,                        }t        j0                  |      j+                  t        j,                        }t        j.                  ||k  ||	      }t        j.                  ||k  ||	      }t        j.                  ||	k(  |	|      }||   }t        j.                  ||   |	k(  |	|      } ||j                         |j                               } ||j                         |j                               } ||||      }|}d d d         |      fS # 1 sw Y   xY w)Nr   )
GatherIndxRoutingDataScatterIndxcompute_expt_data_torch
LOCAL_RANKr+   r   c                     t        j                  |  dd      d d d |f   }|j                         }t        j                  | |d      }||j	                         fS )Nr   T)dimstabler   )r   argsortr=   take_along_dimint)valsktk_indxtk_vals       r   topkz routing_torch_dist.<locals>.topk   sS    mmTEq>q"1"uEGllnG))$Q?F7;;=((r   r   )binsmaxi  T)r   )src_indxdst_indx)ostriton_kernels.routingr   r   r   r   r   r2   r,   distributedget_world_sizer   environgetr5   softmaxsortgatherr9   histcr?   r   r4   wherer   )logitsn_expts_actr   r   r   r   r   
world_sizerankreplace_valuen_tokensn_expts_totn_local_expertslocal_expert_startlocal_expert_endn_gates_padr   	expt_scal	expt_indxsort_indiceshistvar	topk_indx	gate_indxr|   rs   rw   	expt_datahitted_expertss                                r   routing_torch_distr      s~    dd			6==	) 3%&&557
2::>>,23<<?ll1o%3!O3 1H7,	)  $FK8	9MM)4	"'**YA">	<LLA|<	 %%b)	{{9;K!OLM_`pqNN2&))%++6	 KK	,> >YO	MM)D9<<U[[I	MM),//<	KK	,< <iW	KK 2i ?MZ	KK	] :M9U	i(	KK	) 4 E}V_`	 !)--/IMMOT"IMMOimmoV+D/;O	$g3%h y$SU`bnnni3% 3%s   I9K

Kc                 (   dd l m} |j                         r|j                         rt        }nddlm} |}|j                  d   }|j                  d| j                  j                        }t        j                  j                  || j                  j                  | j                  j                        } ||| j                  j                         \  }}}| j#                  ||||      }	|	j                  |d| j                  j                        }	|	|fS )Nr   )routingr+   )torch.distributedr   r3   is_initializedr   r   r   r5   r9   router
hidden_dimr   
functionallinearweightbiastop_kexperts)
rh   rk   distr   
batch_sizerouter_logitsr}   r~   r   
routed_outs
             r   mlp_forwardr     s    $t224$2$$Q'J!))"dkk.D.DEMMM((8J8JDKKL\L\]M,3M4;;CTCT,U)L*km\:{SJ##JDKK4J4JKJ}$$r   c                 R    dj                  |       t        fd|D              syy)N.c              3      K   | ]6  }t        j                  | d       xs t        j                  |        8 yw)z\.N)rematch).0keycurrent_key_name_strs     r   	<genexpr>z(should_convert_module.<locals>.<genexpr>#  s>      dgC523_rxx3%J^7__s   <?TF)joinany)current_key_namepatternsr   s     @r   should_convert_moduler   !  s0    88$45 ks  r   c                 V   ddl m} |j                  dd       }|j                  dd       }|j                  dd       }	|j                  dd       }
|j                  dd       }|j                  dd       }d	D ]1  }||v s	| ||||||	|
||d
	      }| d}| d}t        | |j	                  dd      d   |       t        | |      sSt        | |      s`t        t        | |      t        | |            }|j                  dd      j                         j                  |      }|dk(  r<t        j                  j                         rt        j                  j                          t        | |t        j                  j!                  |             t#        | |       t#        | |       4 y )Nr   shard_and_distribute_modulemodelempty_paramcasting_dtypeto_contiguousr   device_meshrz   r{   F)	set_param_blocks_scalesr   r   cpu)integrations.tensor_parallelr   r   setattrrsplithasattrrN   getattr	transpose
contiguousr   r   r2   r3   empty_cacher   r\   delattr)module
param_nameparam_valuetarget_devicedq_param_namekwargsr   r   r   r   r   r   r   projblocks_attrscales_attrdequantizeds                    r   
dequantizer   *  s   JJJw%E**]D1KJJ5MJJ5M::fd#D**]D1K- -:&9!!!#
 "F'*K!F'*KFJ--c15a8+Fv{+0L89UW^_egrWst)33Aq9DDFII-X E)ejj.E.E.GJJ**,ehh&8&8&EF,,3-r   c                    ddl m}m}m} ddlm} |j                  dd       }	|j                  dd       }
|j                  dd       }|j                  dd       }|j                  d	d       }|j                  d
d       }dD ]F  }||v s	| ||	||
|||||       n?t        | |j                  dd      d   t        j                  j                  |d             | d}| d}t        | |      }t        | |      }|j                  j                  dk7  s|j                  j                  dk7  s|j                  d      }|dk(  r!|j!                  || j"                  dz  d      }n |j!                  |d| j"                  dz        }t        |d|      dk(  rd}|j%                  |      }|j%                  |      }t        j&                  j                  |      5  t)        |j+                  dd      |j+                  dd            \  }}d d d        |dk(  r5t        j,                  || j.                  | j"                  dz  g      _        n1t        j,                  || j"                  | j.                  g      _        t        | ||       t        | | d | | |                          t3        | |       t3        | |       ~I y # 1 sw Y   xY w)Nr   )FlexCtx
InFlexDataPrecisionConfigr   r   r   r   r   r   r   r   r   r   r   FrT   r   r   metarz   r+   typer   r2   _precision_config)rhs_data)weight_scaleflex_ctx)rx   r   r  r  r   r   r   r   r   r   r   r\   r   r,   r  sizer?   rZ   r   r2   r   r   Sizer[   r5   r   )r   r   r   r   r   r   r  r  r   r   r   r   r   r   r   r   r   r   r@   rA   local_expertstriton_weight_tensorr  s                          r   load_and_swizzle_mxfp4r  P  s   NNJJJw%E**]D1KJJ5MJJ5M::fd#D**]D1K- 5:&+;ZP]_cep 
 1 1#q 9! <ehh>P>PQ\lq>P>rs!F'*K!F'*KV[1FV[1F}}!!V+0B0Bf0L &A>)#[[8P8PST8TVXYF#[[F<T<TXY<YZF=&-@EI$*M=1=1ZZ&&}5 9F((R0&2B2B2r2J:6(, >)16&(:(:F<T<TWX<XY2(. 27&(@(@&BTBTU2(.
 &:;f-.#YcYeHfg ,,k54 s   0K

K	c           	      n   |g }| j                         D ]  \  }}|j                  |       t        ||      s|j                  d       6|j                  j
                  dk(  r9|j                  s-t               5  t        |      | j                  |<   d}d d d        |j                  j
                  dk(  r$|j                  sddl
m}  |t        |      |_        t        t        |j!                                     dkD  rt#        ||||||      \  }	}|j                  d        | |fS # 1 sw Y   xY w)Nr+   GptOssExpertsT	GptOssMLPr   )
MethodType)has_been_replacedri   )named_childrenappendr   poprj   r   r   r   rP   _modulestypesr  r   r   lenlistchildren_replace_with_mxfp4_linear)
r   modules_to_not_convertr   quantization_configr  ri   namer   r  _s
             r   r  r    s1    ,,. !f%$%57MN  $$$7@S@^@^#% )'9&'At$$(!) $$3<O<Z<Z('V<FNtFOO%&'!+#=& #"3$ A  	R -!. ####) )s   :D++D4	c                     |j                   r| S |dgn|}|j                  |j                  |j                         t        t	        |            }t        | ||||      \  } }|st        j                  d       | S )Nlm_head)ri   zYou are loading your model using mixed-precision FP4 quantization but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)r   r  extendr  setr  loggerwarning)r   r  r   r  ri   r  s         r   replace_with_mxfp4_linearr'    s     %%,B,Ji[Pf11=%%&9&P&PQ!#&<"=>9 E 	
 Lr   )NNNFN)NNNN)utilsr   r   r   r   r   
accelerater   r   
get_loggerr   r%  r7   r   r   r   r    r   r   rN   ModulerP   r   r   r   r   r  r  r'   r   r   <module>r-     s    I H - 	 
		H	%
*: &4 ;;	4
 4 \\4n@# @#J<o~%$#-LAL  "$N  r   