
    rh.T                     p   d Z ddlZddlmZ ddlZddlmZ ddlZddl	m
Z
 ddlmZmZ ddlZddlmZmZ ddlmZmZ  e       rddlZ e       rdd	lmZ dd
lmZ  ej6                  e      Z	 ddZdee    dejB                  de"e ejB                  f   de dee    ejB                  ff
dZ#d Z$d Z%d Z&d Z'y)z!PyTorch - Flax general utilities.    N)UnpicklingError)
from_bytes)flatten_dictunflatten_dict   )is_safetensors_availableis_torch_available)check_torch_load_is_safelogging)	safe_open)	load_filec                 v   |st         j                  j                  |      }t        j	                  d|        |j                  d      rCi }t        |d      5 }|j                         D ]  }|j                  |      ||<    	 ddd       n\	 ddl	}t                 |j                  |dd	
      }t        j	                  dt        d |j!                         D              dd       t#        ||       }	|	S t%        ||       }	|	S # 1 sw Y   %xY w# t        t        f$ r t        j                  d        w xY w)z(Load pytorch checkpoints in a flax modelzLoading PyTorch weights from .safetensorsflax)	frameworkNr   zLoading a PyTorch model in Flax, requires both PyTorch and Flax to be installed. Please see https://pytorch.org/ and https://flax.readthedocs.io/en/latest/index.html#installation for installation instructions.cpuT)map_locationweights_onlyzPyTorch checkpoint contains c              3   <   K   | ]  }|j                           y wN)numel).0ts     {/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/modeling_flax_pytorch_utils.py	<genexpr>z=load_pytorch_checkpoint_in_flax_state_dict.<locals>.<genexpr>K   s     :e1779:es   ,z parameters.)ospathabspathloggerinfoendswithr   keys
get_tensortorchImportErrorModuleNotFoundErrorerrorr
   loadsumvalues"convert_pytorch_state_dict_to_flax*convert_pytorch_sharded_state_dict_to_flax)

flax_modelpytorch_checkpoint_path
is_shardedallow_missing_keyspt_pathpt_state_dictfkr%   flax_state_dicts
             r   *load_pytorch_checkpoint_in_flax_state_dictr7   0   s9   
 ''//"9:3G9=>N+M7f5 7 7A'(||AM!$77 7 %&&EJJwUQUVMKK6s:emNbNbNd:e7efg6hhtuv<]JW  EE\^hi-7 7  !45 %
 s   *DD D&D8pt_tuple_key	pt_tensorrandom_flax_state_dictmodel_prefixreturnc                    dt         t           dt        ffd}| dd dz   }| d   dv r ||      r||fS | dd dz   }| d   d	k(  r ||       s||fS | dd d
z   }| d   dk(  r ||       s||fS | dd dz   }| d   dk(  r ||      r||fS | dd dz   }| d   dk(  r/|j                  dk(  r  ||       s|j	                  dddd      }||fS | dd dz   }| d   dk(  r ||       s|j
                  }||fS | dd dz   }| d   dk(  r||fS | dd dz   }| d   dk(  r||fS d}| ddd   dk(  r	| d   dz   }n| ddd   dk(  r| d   dz   }|| dd |fz   }||fS | |fS )zYRename PT weight names to corresponding Flax weight names and reshape tensor if necessarykeyr<   c                 D    t        t              | f| z   hz        dkD  S )zAChecks if `key` of `(prefix,) + key` is in random_flax_state_dictr   )lenset)r>   r;   r:   s    r   is_key_or_prefix_key_in_dictzCrename_key_and_reshape_tensor.<locals>.is_key_or_prefix_key_in_dict\   s+    3-.#7L1MMNQRRR    N)scale)weightgamma)meanrunning_mean)varrunning_var)	embeddingrF   )kernel         r   r   rF   rG   )biasbeta)parametrizations	original0_g)rU   	original1_v)tuplestrboolndim	transposeT)r8   r9   r:   r;   rB   renamed_pt_tuple_keynames     ``   r   rename_key_and_reshape_tensorrc   T   sX   S%* S S
 (,z9B..3OPd3e#Y.. (,y8B>)2N|2\#Y.. (,x7B=(1Ml1[#Y.. (,~=B8#(DEY(Z#Y.. (,{:B8#	!(;D`amDn''1a3	#Y.. (,{:B8#,H,VKK	#Y.. (,{:B7"#Y.. (,y8B6!#Y.. DBEE??B$&	be!e	 A	AB$&+CR0D7:#Y..""rC   c           
         t               xr: t        t        t        | j	                                     t
        j                        }|rt
        j                  nd}| j                         D ci c]  \  }}||j                   }}}|rX| j                         D ]E  \  }}|j                  |k(  r|j                         }|j                         j                         | |<   G |j                  }d|j                  v r|j                  d   }n|j                  }t        |      }	d|j                  v r)t        |j                  d         }
|	j!                  |
       i }||vxr$ || D ch c]  }|j#                  d      d    c}v }||v xr$ || D ch c]  }|j#                  d      d    c}v}| j                         D ]}  \  }}t%        |j#                  d            }||   |k(  }|d   |k(  }|r|r|dd  }t'        |||	|      \  }}|f|z   |	v }|r|r|f|z   }||	v rH|j(                  |	|   j(                  k7  r,t+        d| d|	|   j(                   d	|j(                   d      d|j                  v rd
|d   v sd|d   v rt-        j.                  |      |d|z   <   d|d   v r|j1                  |d        |st-        j.                  |      n$t-        j.                  |t,        j                        |d|z   <   ?|st-        j.                  |      n$t-        j.                  |t,        j                        ||<    t3        |      S c c}}w c c}w c c}w )Nbfloat16paramsbatch_stats.r   r   1PyTorch checkpoint seems to be incorrect. Weight  was expected to be of shape 	, but is rH   rD   rJ   rg   num_batches_trackeddtyperf   )r	   
isinstancenextiterr+   r%   Tensorre   itemsro   floatr   numpybase_model_prefixrf   r   updatesplitr[   rc   shape
ValueErrorjnpasarraypopr   )r3   r.   from_binre   r5   vweight_dtypesr;   flax_model_paramsr:   flax_batch_statsr6   $load_model_with_head_into_base_model$load_base_model_into_model_with_headpt_keyr9   r8   is_bfloat_16has_base_model_prefixflax_keyflax_tensorrequire_base_model_prefixs                         r   r,   r,      s   !#d
4]=Q=Q=S8T3UW\WcWc(dH!)u~~zH,9,?,?,ABDAqQZBMB!'') 	/DAqww("GGI uuw}}M!		/ //L :$$$&--h7&--)*;< 
)))'
(9(9-(HI%%&67O,8@Q,Q ,-@Qa@@ ) -9<M,M ,mDQWWS\!_DD )
 +002 ,	V\\#./$V,8 !-Q< ?/4I'+L !>)%;\!
+
 &2Oh$>BX$X!/4M$1H--  $:8$D$J$JJ GxOl-h7==>iHYHYGZZ[]  J---"%(2,)>?B{{;?W 08 ;<$4##Hd3 1=K(#++kadamamBn K(23 1=K(#++kadamamBn H%U,\ /**_ C4 A Es   .M'MMc                    dd l }i }| D ]6  }t                 |j                  |d      }|j                         D ci c]  \  }}||j                   }}}|j                         D ci c]M  \  }}||j                  |j
                  k7  r|j                         n|j                         j                         O }}}|j                  }	d|j                  v rB|j                  d   }
t        |
      }|j                  t        |j                  d                n|j                  }
t        |
      }|	|
vxr$ |	|D ch c]  }|j                  d      d    c}v }|	|
v xr$ |	|D ch c]  }|j                  d      d    c}v}|j                         D ]  \  }}t        |j                  d            }||   |j
                  k(  }|d   |	k(  }|r|r|dd  }t        ||||	      \  }}|	f|z   |v }|r|r|	f|z   }||v rH|j                  ||   j                  k7  r,t!        d| d	||   j                   d
|j                   d      d|j                  v rd|d   v rt#        j$                  |      |d|z   <   d|d   v rt#        j$                  |      |d|z   <   d|d   v r|j'                  |d        #|st#        j$                  |      n$t#        j$                  |t"        j
                        |d|z   <   g|st#        j$                  |      n$t#        j$                  |t"        j
                        ||<    9 t)        |      S c c}}w c c}}w c c}w c c}w )Nr   T)r   rg   rf   rh   r   ri   rj   rk   rH   rD   rl   rJ   rm   rn   rp   )r%   r
   r)   ru   ro   re   rw   rv   rx   rf   r   ry   rz   r[   rc   r{   r|   r}   r~   r   r   )shard_filenamesr.   r%   r6   
shard_filer3   r5   r   r   r;   r   r:   r   r   r   r9   r8   r   r   r   r   r   s                         r   r-   r-      s    O% K
 ""

:DA0=0C0C0EF1AGGFFYfYlYlYn
QUQRTUAAGGu~~5qwwy1779??;LL
 
 "33 J--- * 1 1( ;%12C%D"")),z7H7H7W*XY * 1 1%12C%D"0<DU0U 0
mDQWWS\!_DD 	- 1=@Q0Q 0
- HQa HH 	- "/!4!4!6 /	FI c!23L(0ENNBL %1O|$C!38M+AB/ %Bi)?%!Hk *6((BF\(\%38Q(?X511$$(>x(H(N(NN$KF8Sp1(;AAB)KL]L]K^^_a  
 1 11Xb\)CF;;{C[O$4x$?@HRL(CF;;{C[O$4x$?@(HRL8#''$7 5ACKK,ckkR]eheqeqFr  h 67 5ACKK,ckkR]eheqeqFr  )[/	9KX /**Q G
" E !Is   M+AM>M(Mc                    t         j                  j                  |      }t        j	                  d|        t        t        d| j                  j                  z         }|j                  d      rt        |      }t        |d      }n1t        |d      5 }	 t        ||j                               }	 d	d	d	       t#        |       S # t        $ r t!        d| d      w xY w# 1 sw Y   1xY w)
(Load flax checkpoints in a PyTorch modelzLoading Flax weights from Flaxr   rh   )seprbzUnable to convert z  to Flax deserializable object. N)r   r   r   r    r!   getattrtransformers	__class____name__r"   safe_load_filer   openr   readr   OSError"load_flax_weights_in_pytorch_model)modelflax_checkpoint_pathflax_clsr6   state_fs        r   %load_flax_checkpoint_in_pytorch_modelr   L  s    77??+?@
KK,-A,BCD |Veoo.F.F%FGH $$^4()=>(cB&- 	kk",Xw||~"F	k .e_EE # k 23G2HHhijjk	k 	ks   C"CCC""C+c                 

   	 ddl }t        t        j                  j                  d |            j                         }t        |      r6t        j                  d       t        j                  j                  d |      }t        |      }| j                         }| j                  |v xr. | j                  |D ch c]  }|j                  d      d    c}v}| j                  |vxr. | j                  |D ch c]  }|j                  d      d    c}v }g }	t        |j!                               }
|j#                         D ]Q  \  }}|d   | j                  k(  }dj%                  | j                  f|z         |v }|r|r|dd }n|r|r| j                  f|z   }|d	   d
k(  rA|j&                  dk(  r2dj%                  |      |vr|dd	 dz   }t)        j*                  |d      }n_|d	   d
k(  r(dj%                  |      |vr|dd	 dz   }|j,                  }n/|d	   dv r	|dd	 dz   }nd|d	   v r	|dd	 dz   }nd|d	   v r|dd	 dz   }d|v rdj%                  |dd       }ndj%                  |      }i }|D ]b  }|j                  d      }d}|ddd   ddgk(  r	|d   dz   }n|ddd   ddgk(  r|d   dz   }|D|dd |gz   }dj%                  |      }|||<   d ||v r||   }||v r|j.                  ||   j.                  k7  r,t1        d| d||   j.                   d|j.                   d      t3        |t4        j6                        st5        j8                  |      n|} |j:                  |      ||<   |
j=                  |       A|	j?                  |       T | jA                  |       tC        |
      }
tE        |	      dkD  r_t        j                  d| jF                  jH                   d |	 d!| jF                  jH                   d"| jF                  jH                   d#	       n-t        j                  d$| jF                  jH                   d%       tE        |
      dkD  r2t        j                  d&| jF                  jH                   d'|
 d(       | S t        j                  d)| jF                  jH                   d*| jF                  jH                   d+       | S # t        t        f$ r t        j	                  d        w xY wc c}w c c}w ),r   r   NzLoading a Flax weights in PyTorch, requires both PyTorch and Flax to be installed. Please see https://pytorch.org/ and https://flax.readthedocs.io/en/latest/index.html#installation for installation instructions.c                 <    | j                   t        j                  k(  S r   )ro   r}   re   )xs    r   <lambda>z4load_flax_weights_in_pytorch_model.<locals>.<lambda>p  s    CLLAX rC   zFound ``bfloat16`` weights in Flax model. Casting all ``bfloat16`` weights to ``float32`` before loading those in PyTorch model.c                 ~    | j                   t        j                  k(  r| j                  t        j
                        S | S r   )ro   r}   re   astypenpfloat32rp   s    r   r   z4load_flax_weights_in_pytorch_model.<locals>.<lambda>y  s'    8T6==4 Z` rC   rh   r   rD   rM   rN   rQ   )rP   rO   r   r   )rE   rL   rH   )rI   rJ   )rK   rg   rT   rO   rU   rV   rW   rX   rY   rZ   z.Flax checkpoint seems to be incorrect. Weight rj   rk   zQSome weights of the Flax model were not used when initializing the PyTorch model z: z,
- This IS expected if you are initializing z from a Flax model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a FlaxBertForPreTraining model).
- This IS NOT expected if you are initializing z from a Flax model that you expect to be exactly identical (e.g. initializing a BertForSequenceClassification model from a FlaxBertForSequenceClassification model).z3All Flax model weights were used when initializing z.
zSome weights of zE were not initialized from the Flax model and are newly initialized: zo
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.zAll the weights of z were initialized from the Flax model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use z* for predictions without further training.)%r%   r&   r'   r    r(   r   jax	tree_utiltree_mapr+   anywarning
state_dictrx   rz   rA   r#   ru   joinr^   r}   r_   r`   r{   r|   rq   r   ndarrayr~   
from_numpyremoveappendload_state_dictlistr@   r   r   )pt_model
flax_stater%   is_type_bf16r6   pt_model_dictr5   r   r   unexpected_keysmissing_keysflax_key_tupler   r   r   r   special_pt_namesr>   key_componentsrb   key_to_checks                        r   r   r   b  s      6 67XZd efmmoL
< 	5	
 ]]++`bl

 #:.O'')M,4,F,F*,T ,""M*Rq1773<?*RR ) -5,F,Fj,X ,""&N1qwws|A&NN )
 O}))+,L'6'<'<'> A-# .q 1X5O5O O$'HHh.H.H-J^-[$\`m$m! 04I+AB/N16O&88:^KN ")k.>.>!.CQ_H`huHu+CR0;>N--\BKB8+0HP]0]+CR0;>N%--KB#99+CR0;>N ~b))+CR03DDNnR((+CR03CCNJ&xxqr 23Hxx/H   
	5C YYs^NDbe!e$);[(II%b)D0A&+={*KK%b)D0!/!4v!="xx714 .
	5 '''1H}$  M($;$A$AA D^DT U&&3H&=&C&C%DIkN_N_M``ac  >HUWU_U_=`bjj5fq*:%*:*:;*Gh'##H- ""8,CA-F ]+ %L
?a""++,B.? @""++, -88@8J8J8S8S7T U9	9	
 	LXM_M_MhMhLiilmn
<1x11::; <)N +55	
 O 	!("4"4"="=!> ?##+#5#5#>#>"??ik	
 OI ,- 	

 	2 +S 'Os   S 9S;7T &S8)F)(__doc__r   pickler   r   	jax.numpyrw   r}   r   flax.serializationr   flax.traverse_utilr   r   r    r   r	   utilsr
   r   r%   safetensorsr   safetensors.flaxr   r   
get_loggerr   r    r7   r[   r\   r   dictrc   r,   r-   r   r    rC   r   <module>r      s    ( 	 " 
   ) ;  : 4 %< 
		H	% IN!H@#*@#zz@# !ckk!12@# 	@#
 Cj"**@#FT+xQ+rF,IrC   