
    rh2                     $   d dl Z d dlZd dlZd dlmZmZ d dlZd dlmZ d dl	m
Z
mZ d dlmZmZmZmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZmZmZm Z m!Z!m"Z" d dl#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) d d	l*m+Z+ d d
l,m-Z- ddgZ. G d de      Z/ G d de
      Z0y)    N)AnyOptional)narrow_tensor_by_index)FsspecReaderFsspecWriter)_gen_file_name
_get_dtype_get_safetensors_file_metadata_HFStorageInfo_metadata_fnCUSTOM_METADATA_KEYDATA_KEYDATA_OFFSETS_KEYDEFAULT_EXTRA_METADATA_KEY	DTYPE_KEYSAVED_OFFSETS_KEY	SHAPE_KEYSUFFIX)SerializationFormat)ChunkStorageMetadataMetadataMetadataIndexStorageMetaTensorPropertiesTensorStorageMetadata)LoadPlanLoadPlannerReadItemSavePlanSavePlanner	WriteItem)WriteResult)FutureHuggingFaceStorageWriterHuggingFaceStorageReaderc                       e Zd ZdZ	 	 	 ddedeeeef      dee   deddf
 fdZ	d	e
e   de
e   fd
Zdededee
e      f fdZdede
e
e      ddfdZdeeeef      de
e   deee
e   f   fdZedefd       Z xZS )r$   z
    A writer that writes to a huggingface repository in the huggingface format.
    Uses Fsspec back-end to communicate with back-end storage.
    Fsspec registration of the storage solution is required.
    Npathfqn_to_index_mappingtokensave_shardedreturnc                     |!t         |   ||t        j                         nt         |   |t        j                         || _        || _        y)a  
        Initialize the huggingface writer pointing to path.

        Args:
            path: hf directory where the checkpoint will be read from.
                  Needs to have .safetensors files, but can be from any fsspec supported storage,
                  including localFS and hf://.
            fqn_to_index_mapping: A mapping from tensor FQN to the index of the file that the tensor should be written to.
                              Indices are from 1 to N, where N is the number of files. If not provided,
                              the tensors will be written to a single file. If none, then all the tensors on the
                              same rank will be written to the same file.
            token: The token to use to authenticate with huggingface hub.
            save_sharded: If True, save the checkpoint as a sharded checkpoint where every rank saves its own shard.
                        Default is False which assumes full tensors are being saved.

        N)r'   r)   serialization_format)r'   r-   )super__init__r   SAFETENSORS_fqn_to_index_mapping_save_sharded)selfr'   r(   r)   r*   	__class__s        z/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/torch/distributed/checkpoint/hf_storage.pyr/   z!HuggingFaceStorageWriter.__init__8   s`    0 G%8%D%D   G%8%D%D   @T")    plansc                     g }t        |d      D ]Y  \  }}i }| j                  | j                  |d<   | j                  r||d<   |j                  t	        j
                  ||             [ |S )N   )startr(   shard_index)storage_data)	enumerater1   r2   appenddataclassesreplace)r3   r7   	new_plansiplanr<   s         r5   prepare_global_planz,HuggingFaceStorageWriter.prepare_global_plan^   s{    	 a0 	SGAt+-L))57;7Q7Q34!!./]+[00LQR	S r6   rC   plannerc                 &   t        |j                        dk(  rt               }|j                  g        |S |j                  }d }d }d|v r|d   }d|v r|d   }| j                  ||j                        }|t        |j                               nd}t        j                         }	|j                         D ]J  \  }
}t        |
||      }|	j                  | j                  j                  | j                  |      ||f       L t        | A  ||	      S )Nr   r(   r;   r9   )lenitemsr#   
set_resultr<   _split_by_storage_planmaxvaluesqueueQueuer   putfsconcat_pathr'   r.   _write_data)r3   rC   rE   futr<   storage_planr;   bucketshighest_index
file_queue
file_indexwrite_items	file_namer4   s                r5   
write_dataz#HuggingFaceStorageWriter.write_datak   s   
 tzz?a (CNN2J (,'8'815%)!\1'(>?LL(&}5K--lDJJG6B6NL//12TU"'++-
'.}} 	#J&z=+NINN$$TYY	:I{S	 w"7J77r6   metadataresultsc                 <   | j                   ry i }i }d}|D ]z  }|j                  |D ci c]-  }|j                  j                  |j                  j
                  / c}       |t        |D cg c]  }|j                  j                   c}      z  }| d|i|d<   ||d<   | j                  j                  | j                  t               }| j                  j                  |d      5 }	t        j                  ||	d       d d d        y c c}w c c}w # 1 sw Y   y xY w)Nr   
total_sizer\   
weight_mapw   )indent)r2   updateindexfqnr<   relative_pathsumlengthrP   rQ   r'   r   create_streamjsondump)
r3   r\   r]   metadata_to_write
storage_mdr_   wr_listwrmetadata_pathmetadata_files
             r5   finishzHuggingFaceStorageWriter.finish   s   

 	JGGNOr<<<O #H"r55HIIJ		J
 *6z(B*%*4,'++DII,IWW""=#6 	B-II'qA	B 	B PH
	B 	Bs   2D
+D&DDrT   rH   c                     |d|iS i }|D ]<  }|j                   j                  }||   }||vr|g||<   )||   j                  |       > |S )Nr9   )re   rf   r>   )r3   rT   rH   rU   itemkeyidxs          r5   rJ   z/HuggingFaceStorageWriter._split_by_storage_plan   sk     u: 	*D**..Cs#C'! $v##D)	* r6   c                     t         S N)r   )r3   s    r5   rq   z&HuggingFaceStorageWriter.metadata_path   s    r6   )NNF)__name__
__module____qualname____doc__strr   dictintboolr/   listr   rD   r    r#   r"   r[   r   rs   r!   rJ   propertyrq   __classcell__r4   s   @r5   r$   r$   1   s    :>#"$*$* 'tCH~6$* }	$*
 $* 
$*Lh DN 88 8 
[!	"	8>Bx B$tK7H2I Bd B&$T#s(^4=A)_	c4	?"	#& s  r6   c                   ^     e Zd ZdZddedee   ddf fdZdedede	d   fd	Z
defd
Z xZS )r%   z
    A reader that reads from a huggingface repository in the huggingface format.
    Uses in Fsspec back-end to communicate with storage.
    Fsspec registration of the storage solution is required.
    Nr'   r)   r+   c                 N    |t         |   ||       yt         |   |       y)ai  
        Initialize the huggingface reader pointing to path.

        Args:
            path: hf directory where the checkpoint will be read from.
            Needs to have .safetensors file, but can be from any fsspec supported storage,
            including localFS and hf://.
            token: The token to use to authenticate with huggingface hub.
        N)r'   r)   )r'   )r.   r/   )r3   r'   r)   r4   s      r5   r/   z!HuggingFaceStorageReader.__init__   s-     G$e4G$'r6   rC   rE   c                 f   ddl m} i }|j                  D ]H  }| j                  |j                     }|j
                  }|j                  |g       j                  |       J |j                         D ]  \  }}| j                  j                  |d      5 }	 ||	j                               }
|
D ci c]  }|d   |d    }}|D ]2  }| j                  |j                     }||j                  j                     t           }t        j                  ||j                         }|j#                  |j$                        }t'        ||j(                  |j*                        }|j-                  |      j/                         }|j1                         |j1                         k(  s6J d|j                   d|j1                          d|j1                                 |j3                  |       |j5                  ||       5 	 d d d         t7               }|j9                  d        |S c c}w # 1 sw Y   xY w)	Nr   )deserializerbr9   dtypezreq z mismatch sizes z vs )safetensorsr   rH   r<   storage_indexrg   
setdefaultr>   rP   rj   read
dest_indexrf   r   torch
frombufferr   reshapeshaper   storage_offsetslengthsresolve_tensordetachsizecopy_commit_tensorr#   rI   )r3   rC   rE   r   per_file	read_itemitem_mdrZ   reqsstreamdeserializedtensor_infodeserialized_dictreqtensor_bytestensortarget_tensorrS   s                     r5   	read_dataz"HuggingFaceStorageReader.read_data   s    +.0 	AI&*&7&7	8O8O&PG--I	2.55i@	A
  (~~/ 	>OIt&&y$7 >6  +6;;=9FR@7BKNKN2@! @   >C"//0A0ABG#4S^^5G5G#H#RL"--$%mmF $^^GMM:F3 3 3S[[F %,$:$:3$?$F$F$HM(--/6;;=@ s0011A-BTBTBVAWW[\b\g\g\i[jk@ "''/))#}=)>> >	>> ht
9@	> >s   H&.H!>D:H&!H&&H0	c                    i }i }g }| j                   j                  | j                        D ])  }|j                  t              s|j                  |       + |D ]  }| j                   j                  |d      5 }t        |      \  }}|j                  t              }	d }
|	r=|	j                  t              r(t        j                  |	j                  t                    }
|j                         D ]7  \  }}|t        k(  r|
|
|   t           }ndgt        |t                  z  }||vrt#        t%        t'        |t(                       t+        j,                  t/        |t            |      D cg c]
  \  }}||z    c}}      t1        t+        j,                        t+        j,                  |t                        g      ||<   n||   j2                  j                  t1        t+        j,                  |      t+        j,                  |t                               t5        ||   j6                        }t9        t        |            D ]$  }t;        ||   |t            |   ||   z         ||<   & t+        j,                  |      ||   _        |
t=        ||
|   t                 }n!t=        |dgt        |t                  z        }t?        ||t@           d   |t@           d   |t@           d   z
  t+        j,                  |t                  t'        |t(                 	      ||<   : 	 d d d         tC        ||
      }tE        |dd       tG               |_$        | jJ                  |jH                  _%        |S c c}}w # 1 sw Y   2xY w)Nr   r   r   )offsetssizes)
propertiesr   chunks)r   )rf   offsetr9   )rg   r   ri   r   r   )state_dict_metadatar<   storage_meta)&rP   lsr'   endswithr   r>   rj   r
   getr   r   rk   loadsrH   r   rG   r   r   r   r	   r   r   Sizezipr   r   r   r   rangerK   r   r   r   r   getattrr   r   load_id)r3   r   r<   safetensors_filesfilesafetensor_filefsafetensors_metadata_custom_metadatadcp_sharding_inforv   valr   savedr   rB   metadata_indexr\   s                      r5   read_metadataz&HuggingFaceStorageReader.read_metadata   sn   @B<>GGJJtyy) 	/D}}V$!((.	/  1 A	O&&= @*H*K'$a"6":":;U"V$(!"':':;N'O(,

'++,?@)% !5 : : < 6HC88  )4!23!78I!J"#s3y>':!:"553H'7&0Y&@( "' :=S^V9T!"(5v %*FN!"" !5,1JJv,>*/**S^*D!"$4+C0$ ,C077>>0 %

6 2%**S^:T
  $$7$<$A$AB!&s4y!1 RA&)$q'3y>!3Dvay3P&QDGR8=

48H+C05 )4)6 #,=c,BCT,U* *7 #QC#c)n2E,E* 4B&5"#34Q7"#34Q7#>N:OPQ:RR#jjY8(Y84L0a6@ @A	F  3%

 8^T2:$/MH!(,%c!"5@ @s    :C4N.M<=F-N<NN	ry   )rz   r{   r|   r}   r~   r   r/   r   r   r#   r   r   r   r   r   s   @r5   r%   r%      sS    (S (# ($ ( +h + + +ZUx Ur6   )1r?   rk   rM   typingr   r   r   torch.distributed._shard._utilsr   /torch.distributed.checkpoint._fsspec_filesystemr   r   &torch.distributed.checkpoint._hf_utilsr   r	   r
   r   r   r   r   r   r   r   r   r   r   'torch.distributed.checkpoint.filesystemr   %torch.distributed.checkpoint.metadatar   r   r   r   r   r   $torch.distributed.checkpoint.plannerr   r   r   r   r    r!   $torch.distributed.checkpoint.storager"   torch.futuresr#   __all__r$   r%    r6   r5   <module>r      sz          B V    H   =   &'A
BA| AHY| Yr6   