
    rhO                         d dl Z d dlmZmZ d dlZd dlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ  e       rd dlZ ee      Zd	d
dddddde	de
d   ide
d   idZ eed   j/                               Z G d de      Z G d d      Z G d de      Z G d de      Z G d de      Z G d d e      Z G d! d"e      Z G d# d$e      Z  G d% d&e      Z! G d' d(e      Z"eeeeeee e!e"e"d)
Z#d* Z$	 	 	 d0d+ee%   d,ee&   d-e%fd.Z'd1d/Z(y)2    N)
NamedTupleOptional)tqdm   )GGUF_CONFIG_MAPPINGGGUF_TOKENIZER_MAPPING_gguf_parse_value)is_torch_available)is_gguf_available)
get_loggerversiontensor_countkv_count)r   r   r   	file_typequantization_version)r   r   )GGUFgeneral	tokenizertokenizer_config)ignoreconfigr   r   r   c                   @    e Zd ZU ej                  ed<   eed<   eed<   y)
GGUFTensorweightsnamemetadataN)__name__
__module____qualname__npndarray__annotations__strdict     {/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/modeling_gguf_pytorch_utils.pyr   r   7   s    ZZ
INr&   r   c                       e Zd ZddZd Zy)TensorProcessorNc                     |xs i | _         y Nr   )selfr   s     r'   __init__zTensorProcessor.__init__>   s    lr&   c                     t        ||i       S r+   r   r-   r   r   kwargss       r'   processzTensorProcessor.processA   s    '4,,r&   r+   )r   r   r   r.   r3   r%   r&   r'   r)   r)   =   s    #-r&   r)   c            	       p     e Zd Zd fd	Zd Z	 ddej                  dedee   dej                  fdZ	 xZ
S )	LlamaTensorProcessorc                 &    t         |   |       y Nr,   superr.   r-   r   	__class__s     r'   r.   zLlamaTensorProcessor.__init__F       'r&   c                    d|v sd|v rx| j                   j                  d      }| j                   j                  d      }d ||fv rt        ||i       S d|v r| j                  |||      }nd|v r| j                  |||      }t        ||i       S )Nz.attn_k.z.attn_q.num_attention_headsnum_key_value_heads)r   getr   _reverse_permute_weights)r-   r   r   r2   	num_headsnum_kv_headss         r'   r3   zLlamaTensorProcessor.processI   s    t!3(=>I;;??+@AL	<00!'444T!77IVt#77LY'4,,r&   r   n_headrC   returnc                     |||k7  r|}|j                   d   |z  dz  } |j                  ||dg|j                   dd   }|j                  dd      j                  |j                         S )Nr      r   )shapereshapeswapaxes)r-   r   rD   rC   dimws         r'   rA   z-LlamaTensorProcessor._reverse_permute_weightsV   sr    
 #,(>!FmmA&(A-GOOFC?W]]12->?zz!Q''66r&   r+   )r   r   r   r.   r3   r    r!   intr   rA   __classcell__r;   s   @r'   r5   r5   E   sE    (- OS
7zz
7+.
7>Fsm
7	
7r&   r5   c                   \     e Zd Zd fd	Zd Zdej                  deeef   dedefdZ	 xZ
S )	Qwen2MoeTensorProcessorc                 &    t         |   |       y r7   r8   r:   s     r'   r.   z Qwen2MoeTensorProcessor.__init__d   r<   r&   c                     d|v rE|j                  d      }|j                  d      }|r!| j                  ||||       t        |d i       S d|v rt        j                  |d      }t        ||i       S )N_exptensor_key_mappingparsed_parametersffn_gate_inp_shexpr   axis)r@   _split_moe_expert_tensorr   r    expand_dims)r-   r   r   r2   rU   rV   s         r'   r3   zQwen2MoeTensorProcessor.processg   sz    T>!',@!A &

+> ?!--g7H$Pbc!'4444' nnW15G'4,,r&   r   rV   r   rU   c                     ||   }| j                   j                  dd      }t        d|      D ]K  }|j                  dd| d      }||   }t	        j
                  t        j                  |            |d   |<   M y )Nnum_experts<   r   mlp.experts..tensors)r   r@   rangereplacetorch
from_numpyr    copy)	r-   r   rV   r   rU   	w_counteri	temp_name
exp_weights	            r'   rZ   z0Qwen2MoeTensorProcessor._split_moe_expert_tensort   s    
 "$'KKOOM26	q)$ 	\A^|A3a5HII J6;6F6FrwwzGZ6[i(3	\r&   r+   )r   r   r   r.   r3   r    r!   r$   r#   rZ   rN   rO   s   @r'   rQ   rQ   c   sC    (-
\zz
\6:39o
\MP
\fj
\r&   rQ   c                   v     e Zd Zd fd	Zd Zdej                  dedefdZdej                  dedefdZ	 xZ
S )	BloomTensorProcessorc                 &    t         |   |       y r7   r8   r:   s     r'   r.   zBloomTensorProcessor.__init__   r<   r&   c                     d|v rI| j                   d   }| j                   d   }d|v r| j                  |||      }n| j                  |||      }t        ||i       S )Nattn_qkvrD   hidden_sizeweight)r   _reverse_reshape_weights_reverse_reshape_biasr   )r-   r   r   r2   rB   n_embeds         r'   r3   zBloomTensorProcessor.process   se    H-Ikk-0G477GT44WiQ'4,,r&   r   rD   rt   c                 (   t        j                  |dd      \  }}}|j                  |||z  |      }|j                  |||z  |      }|j                  |||z  |      }t        j                  |||gd      }|j                  |dz  ||z  z  |      S )N   r   rX   r   )r    array_splitrI   stack)r-   r   rD   rt   qkvqkv_weightss           r'   rr   z-BloomTensorProcessor._reverse_reshape_weights   s     ..!!41aIIfg/9IIfg/9IIfg/9hh1ayq1""6A:F1B#CWMMr&   c                    t        j                  |d      \  }}}|j                  |||z        }|j                  |||z        }|j                  |||z        }t        j                  |||gd      j	                         }|S )Nrv   r   rX   )r    rw   rI   rx   flatten)r-   r   rD   rt   q_biask_biasv_biasqkv_biass           r'   rs   z*BloomTensorProcessor._reverse_reshape_bias   s     "$!;6(9:6(9:6(9:88VVV41=EEGr&   r+   )r   r   r   r.   r3   r    r!   rM   rr   rs   rN   rO   s   @r'   rl   rl      sN    (-
N

 
NC 
NRU 
N
RZZ 
 
s 
r&   rl   c                   &     e Zd Zd fd	Zd Z xZS )T5TensorProcessorc                 &    t         |   |       y r7   r8   r:   s     r'   r.   zT5TensorProcessor.__init__   r<   r&   c                     d }|j                  d      D ]  }|j                         st        |      } n t        ||d|i      S )Nr`   bid)splitisdigitrM   r   )r-   r   r   r2   r   chunks         r'   r3   zT5TensorProcessor.process   sH    ZZ_ 	E}}%j	 '4%66r&   r+   r   r   r   r.   r3   rN   rO   s   @r'   r   r      s    (7r&   r   c                   &     e Zd Zd fd	Zd Z xZS )GPT2TensorProcessorc                 &    t         |   |       y r7   r8   r:   s     r'   r.   zGPT2TensorProcessor.__init__   r<   r&   c                     d|v sd|v sd|v sd|v r|j                   }|dk(  rDd}|j                  di       }t        j                  t	        j
                  |            |d   |<   d }t        ||i       S )	Nzattn_qkv.weightzffn_down.weightzffn_up.weightzattn_output.weightoutput.weightzlm_head.weightrV   ra   )Tr@   rd   re   r    rf   r   )r-   r   r   r2   rV   s        r'   r3   zGPT2TensorProcessor.process   s     % D($&#t+iiG ?" $D &

+> C161A1A"'''BR1Si(.D'4,,r&   r+   r   rO   s   @r'   r   r      s    (-r&   r   c                   &     e Zd Zd fd	Zd Z xZS )MambaTensorProcessorc                 &    t         |   |       y r7   r8   r:   s     r'   r.   zMambaTensorProcessor.__init__   r<   r&   c                     d|v rt        j                  |d      }d|v rt        j                  |       }t        ||i       S )Nzssm_conv1d.weightr   rX   ssm_a)r    r[   logr   r1   s       r'   r3   zMambaTensorProcessor.process   sD    $& nnW15Gd? ffgX&G'4,,r&   r+   r   rO   s   @r'   r   r      s    (	-r&   r   c                   &     e Zd Zd fd	Zd Z xZS )NemotronTensorProcessorc                 &    t         |   |       y r7   r8   r:   s     r'   r.   z NemotronTensorProcessor.__init__   r<   r&   c                 .    d|v r|dz
  }t        ||i       S Nznorm.weightr   r0   r1   s       r'   r3   zNemotronTensorProcessor.process   "    D kG'4,,r&   r+   r   rO   s   @r'   r   r      s    (-r&   r   c                   &     e Zd Zd fd	Zd Z xZS )Gemma2TensorProcessorc                 &    t         |   |       y r7   r8   r:   s     r'   r.   zGemma2TensorProcessor.__init__   r<   r&   c                 .    d|v r|dz
  }t        ||i       S r   r0   r1   s       r'   r3   zGemma2TensorProcessor.process   r   r&   r+   r   rO   s   @r'   r   r      s    (
-r&   r   )
llamaqwen2moebloomt5	t5encodergpt2mambanemotrongemma2gemma3c                     || j                   vrg S | j                   |   }|j                  D cg c]%  }t        |j                  |   |j                        ' c}S c c}w r+   )fieldsdatar	   partstypes)readerfieldvalue_data_indexs       r'   
read_fieldr     sP    FMM!	MM% EX]XbXbcekk+6Dcccs   *A
model_type
num_layers	qual_namec           	         t               rt               r	ddlm}m} n t
        j                  d       t        d      || j                  j                  n|}|| j                  j                  n|}|dk(  rd}n|dk(  rd	}n|d
k(  rd}d}|j                         D ]  \  }}||k(  s|} n |t        d| d       |||      }	i }
| j                         }|D ]  }|d	k(  rd|v rt        j                  dd|      }|d}}|j!                  d      s|j!                  d      r|j#                  dd      \  }}d|z   }|	j%                  |      }|w||z   |
||z   <    | j'                         x}rW|D ]R  \  }}t)        |||| | d      }|j                         D ci c]  \  }}||
vs|| }}}|
j+                  |       T |
S c c}}w )aY  
    GGUF uses this naming convention for their tensors from HF checkpoint:
    `blk.N.BB.weight` and `blk.N.BB.bias`
    where N signifies the block number of a layer, and BB signifies the
    attention/mlp layer components.
    See "Standardized tensor names" in
    https://github.com/ggerganov/ggml/blob/master/docs/gguf.md for details.
    r   )MODEL_ARCH_NAMESget_tensor_name_mapLoading a GGUF checkpoint in PyTorch, requires both PyTorch and GGUF>=0.10.0 to be installed. Please see https://pytorch.org/ and https://github.com/ggerganov/llama.cpp/tree/master/gguf-py for installation instructions.KPlease install torch and gguf>=0.10.0 to load a GGUF checkpoint in PyTorch.Ncoherez	command-r	qwen2_moer   gemma3_textr   zUnknown gguf model_type: z in gguf-py. This might because you're using an outdated version of gguf-py package, you can install `gguf` package from source refer to https://github.com/ggerganov/llama.cpp/tree/master/gguf-py#developmentr_   zmlp.experts.\d+. z.weightz.biasr`   r   )r   )r   r
   ggufr   r   loggererrorImportErrorr   r   num_hidden_layersitemsNotImplementedError
state_dictresubendswithrsplitget_namenamed_childrenget_gguf_hf_weights_mapupdate)hf_modelr   r   r   r   r   archkeyr   name_mapgguf_to_hf_name_mapr   hf_namer   suffix	gguf_namer   childsub_maprz   r{   s                        r'   r   r     s5    13>>A	
 ghh/9/A++zJ6@6H22jJX 
	{	"
	}	$
D&,,. 
UJD |!'
| 4U U
 	
 #44H $$&J F#'(Aff0.'JGfI&'*:*:7*C">>#q1LD&6\F%%d+	2;g2EI./F$ "0022~2) 	0KD%-eZZcYdeidjjkWlmG(/X11DW;Wq!tXGX&&w/		0  Ys   G	,G	c                 \  #$ t               rt               r	ddlm}m} n t
        j                  d       t        d       ||       }|j                  }t        |j                               }t        D ci c]  }|i  }	}t        |d      d   }
t        |d      }d}d|
v rd	|v rd	}n"d
|
v sd|
v rd|	d   d<   d|
v r	dg|	d   d<   d
}n|
}d|
v rd}d|
v rSh d#d$t        #fd|j                  D              }t        $fd|j                  D              }||	d   d<   | |	d   d<   |
t        vr|t        vrt!        d|
 d      ddg}t#        d |j                  D              xs |
|v |	d   d<   |j                  j%                         D ]-  \  }}|j'                  |
|      }|j)                  d       }|d   }d j+                  |d!d       }|j,                  D cg c]%  }t/        |j0                  |   |j2                        ' }}t5        |      d!k(  r|d   }t7        |t8              r|
|v r|j'                  |
|      }t        j%                         D ]@  \  }}||v s|||   v s||   |   }|d"k(  r!|||	|   |<   ||v s0|j;                  |       B ||v st
        j=                  d#| d$|        0 |	d   d%   d&k(  rd'|	d   d%<   d(|	d   vr3|	d)   }d*|v rt5        |d*         |	d   d(<   nt
        j?                  d+       |ri |	d,<   tA        |      }|	jC                  di       }tD        jC                  |
tF              } ||-      }tI        |j                  d./      D ]  }|jJ                  }  ||j,                  |jL                        }!|jO                  |!| ||	0      }"|"jP                  }!|"jJ                  } | |vr^||    } tS        jT                  tW        jX                  |!            |	d,   | <    t5        |      dkD  rt
        j=                  d1|        |	S c c}w c c}w )2a  
    Load a GGUF file and return a dictionary of parsed parameters containing tensors, the parsed
    tokenizer and config attributes.

    Args:
        gguf_checkpoint_path (`str`):
            The path the to GGUF file to load
        return_tensors (`bool`, defaults to `False`):
            Whether to read the tensors from the file and return them. Not doing so is faster
            and only loads the metadata in memory.
    r   )
GGUFReader
dequantizer   r   zgeneral.architecturezgeneral.nameNr   mistralr   r   Tr   is_gated_actT5EncoderModelarchitecturesr   r   stablelm>   attn_k.biasattn_q.biasattn_v.biasffn_normc              3   H   K   | ]  }D ]  }||j                   v    y wr+   r   ).0tensor	bias_nameattn_bias_names      r'   	<genexpr>z'load_gguf_checkpoint.<locals>.<genexpr>  s)     mF^lmQZyFKK/m/ms   "c              3   :   K   | ]  }|j                   v   y wr+   r   )r   r   ffn_norm_names     r'   r   z'load_gguf_checkpoint.<locals>.<genexpr>  s     #^VMV[[$@#^s   use_qkv_biasuse_parallel_residualzGGUF model with architecture z is not supported yet.falconr   c              3   :   K   | ]  }d |j                   k7    yw)r   Nr   )r   r   s     r'   r   z'load_gguf_checkpoint.<locals>.<genexpr>  s     HvOv{{*Hs   tie_word_embeddingsr`   r   z1Some keys were not parsed and added into account z | r   r   r   
vocab_sizer   tokenszCan't find a way to retrieve missing config vocab_size from tokenizer parameters. This will use default value from model config class and cause unexpected behavior.ra   r,   z,Converting and de-quantizing GGUF tensors...)desc)r   r   rU   rV   z0Some keys of the GGUF file were not considered: )-r   r
   r   r   r   r   r   r   r   listkeysGGUF_TO_TRANSFORMERS_MAPPINGr   anyra   GGUF_SUPPORTED_ARCHITECTURES
ValueErrorallr   rc   r   joinr   r	   r   r   len
isinstancer#   removeinfowarningr   r@   TENSOR_PROCESSORSr)   r   r   tensor_typer3   r   rd   re   r    rf   )%gguf_checkpoint_pathreturn_tensorsmodel_to_loadr   r   r   r   reader_keysrz   rV   architecture
model_nameupdated_architecturer   r   
exceptionsgguf_keyr   r   prefix
config_keyr   r   	parameterparameter_renamesrenamed_config_keytokenizer_parametersrU   r   ProcessorClass	processorr   r   r   resultr   r   s%                                      @@r'   load_gguf_checkpointr  Z  s    13//A	
 ghh,-F]]Fv{{}%K(DE1BEEf&<=a@LFN3J ,9
#:( 
	!<6:(#N3,&<L;Mh'8#+\!*
 \!F"mfnnmm ##^v~~#^ ^6>(#N3CX?X(#$;<77<PXt<t8F\]^^ G$JHHHfL\fLf h 56
 "==..0 b%##L2FGs#qXXeABi(
]b]g]ghk"5;;{#;U[[Ihhu:?!HEeS!le&;MM,0DEE,H,N,N,P 
	1(I(**z=Nv=V/V%6v%>z%J"%+%1GL%i01CD{*&&x0
	1 {"KKKH:UXY^X_`a7b< "<0H<4A(#L1 ,X660=++8;<PQY<Z8[h'5NNe
 '))$4]C"&&x4*..|_M"&1	6>>0^_ 	TF;;D f.@.@AG&&#5"3	 ' F nnG;;D--%d+D161A1A"'''BR1Si(.'	T* ;!F{mTUy Fh is   5
P$*P))NNr   )FN))r   typingr   r   numpyr    	tqdm.autor   integrationsr   r   r	   utilsr
   utils.import_utilsr   utils.loggingr   rd   r   r   r   r   r   r   r   r)   r5   rQ   rl   r   r   r   r   r   r  r   r#   rM   r   r  r%   r&   r'   <module>r#     s    
 '   
 & 1 % 	H	 !*"

 "-F\] "5kBC$&<=O&PQ    $$@$J$O$O$QR  - -7? 7<\o \<$? $N
7 
7-/ -4-? - -o -	-O 	- "'!
"!'## d !% $	KK K 	K\Ur&   