
    rh?                     h   d dl mZ d dlmZ d dlmZmZ d dlZd dlm	c m
Z d dlm	c mc mZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZ d	ed
ee   deeeef      fdZdeded
ee   deeedf   eedf   f   fdZded	edee e      d
ee   deeedf   eedf   f   f
dZ!dejD                  ded
ee   dee e   e e   f   fdZ#dejH                  ded
ee   dejH                  fdZ%dejL                  jN                  dee(   defdZ)deded
ee   deedf   fdZ*dejH                  fdZ+y)    )defaultdict)Sequence)castOptionalN)	ShapeType)
DeviceMesh)DTensorSpec)_StridedShardPartial	Placement	ReplicateShard
mesh_shape
placementsreturnc           
      t   t        |      t        |       k(  s$t        dt        |       dt        |        d      g }t        t              }t	               }t        |      D ]R  \  }}t        |t              r!||j                     j                  ||f       8|j                  ||f       t        |t              s\|j                  |v rt        d| d| d| d      |j                  |v s|j                  |j                         |j                  |j                        }| |   }t        |      dkD  s|j                         \  }	}
|
j                  |k(  st        d	|
j                   d
| d      || |	   z  }|j                  |	t        |j                        f       t        |      dkD  r{U |S )a$  
    Replace Strided Shards with regular shards in an adjusted order.

    Returns a list of (mesh_dim, placement) tuples where the list order is the sharding order.

    ex.
    [Shard(0), _StridedShard(0, split_factor=2), Shard(0)] ->
    [(0, Shard(0)), (2, Shard(0)), (1, Shard(0))]

    /Expected one placement per mesh dim, but found  placements and  mesh dims.zTStrided sharding does not allow Shard() to appear after the strided part has ended. z at mesh dim z in z violates this assumption.r   z@Can only convert _StridedShard to ordered Shard if split_factor(z) == aggregate mesh size ())lenRuntimeErrorr   listset	enumerate
isinstancer
   dimappendr   NotImplementedErroraddpopsplit_factor)r   r   ordereddeferred_strided_placementsstrided_part_ended_for_dimmesh_dimpstrided_placementsaggregate_sizestrided_mesh_dimstrideds              r/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/torch/distributed/tensor/_utils.py_explicit_order_placementsr-      s    z?c*o-Z))9#j/9J+W
 	
 G"-d"3!$ , I!a''.55xmD NNHa=)!U#5566-778cxjPT%,&@B  5577.221559)D)H)H)O&%/%9N01A54F4J4J4L1('&33~E"."bcjcwcwbx y<<J;K1!N#  '*5E*FF(8%,'GH 01A5'I: N    global_shapemesh.c                 N    t        | |j                  |j                         |      S )a  
    Compute the local tensor shape and the global offsets into the original tensor
    of a DTensor on its current global rank. This is useful for checkpointing purpose.

    Example:
    global_tensor = [[0,  1,  2,  3,  4], sharded on mesh (DP=2, TP=2) with (Shard(1), Shard(1))
                     [10, 11, 12, 13, 14]]

    This table shows the return value of local_shape and global_offset for each rank.
    (`local_tensor` is for illustration only).

    Note how the first coordinate of global_offset is always 0, corresponding to tensor dim 0 being replicated.

    Rank        local_tensor        local_shape     global_offset
    -------------------------------------------------------------
    0           [[0, 1],            (2, 2)          (0, 0)
                 [10, 11]]

    1           [[2],               (2, 1)          (0, 2)
                 [12]]

    2           [[3],               (2, 1)          (0, 3)
                 [13]]

    3           [[4],               (2, 1)          (0, 4)
                 [14]]

    Args:
        global_shape (ShapeType): The global shape of the DTensor.
        mesh (:class:`DeviceMesh`): The device mesh this DTensor is distributed on.
        placements (Sequence[:class:`Placement`]]): The placements of the DTensor.

    Return:
        local_shape: the shape of the DTensor's _local_tensor on the current rank.
        global_offset: a tuple of offsets for each dimension of the global tensor shape,
        identifying how this shard fits into the global tensor in each dimension.

    )&_compute_local_shape_and_global_offsetshapeget_coordinate)r/   r0   r   s      r,   %compute_local_shape_and_global_offsetr5   I   s(    R 2djj$"5"5"7 r.   my_coordinatec                    t        ||      }|yt        |       }dgt        |       z  }|D ]  \  }}||   }	t        |t              s|j
                  }
dgt        |       z  }|
t        |      k  sJ d|
 dt        |              |j                  ||
   |	||         \  }}|||
<   |||
<   |dk(  r	| |
   ||
<   ||
   ||
   k  r	||
   ||
<   ||
xx   ||
   z  cc<    t        |      t        |      fS )N))r    r   Sharding dim  greater than tensor ndim )r-   r   r   r   r   r   _local_shard_size_and_offsettuple)r/   r   r6   r   ordered_placementslocal_shapeglobal_offsetr&   	placementmesh_dim_size	shard_dimlocal_offset
shard_sizeshard_offsets                 r,   r2   r2   x   sR    4J
K<(c,//#5 	LHi&x0M)U+%MM	 !sS%66 3{#33 #I;.H[IYHZ[3 ,5+Q+Q	*!!(+,(
L *4I&*6Y'? 0<I/FM), %Y/<	3JJ3?	3Ji0%i0L4KK0;	Ll [!5#777r.   tensorc           	         t        | j                               }t        | j                               }t        |      D ]  \  }}|j                  |      }|j	                         rt        t        |      }|j                  dk  rt        d|       |j                  }	|	| j                  k  sJ d|	 d| j                   d| d       ||	   }
|
|z  ||	<   t        t        |            D ]  }||	k7  s	||   ||	   k\  s||   |z  ||<   ! t        |t        t        f      rt        dt!        |       d       ||fS )	aV  
    Compute the global size and stride of a DTensor from the given local tensor.
    The local size is multiplited by `world_size` per Sharding dim.
    The local stride is multiplited by `world_size` per Sharding dim, as long as the
    dimension is outside sharding dim.

    For example, if we have a local tensor with size (4, 8, 2) and stride (16, 1, 8).
    If the DTensor placements are [Shard(2)] and world_size is 2;
    then the global size is (4, 8, 4) and stride is (16 * 2, 1, 8).

    Args:
        tensor (:class:`torch.Tensor`):
            Local tensor which DTensor will be constructed from.
        mesh (:class:`DeviceMesh`):
            Object which describes the mesh topology
            of devices for the DTensor.
        placements (Sequence[:class:`Placement`]]):
            The attribute of the DTensor that describes its layout
            on the mesh topology.

    Return:
        tensor_shape: A List of int which specifies the size of DTensor which build
            on top of the local tensor.
        tensor_stride: A List of int which specifies the stride of DTensor.
    r   zOShard placements should have negative dims normalized in the user-facing APIs: r9   r:   z for placement number .zplacement type z not supported!)r   sizestrider   is_shardr   r   r   AssertionErrorndimranger   r   r   r   r   type)rF   r0   r   tensor_shapetensor_strideidxr@   rA   shard_placementrB   local_dim_sizeis               r,   compute_global_tensor_inforV      sp   8 &L)M#J/ SY		#"5)4O""Q&$--<,=?  (++Iv{{* 	{*DV[[MQghkgllmn* *)4N&4}&DL# 3}-. H	>mA&6-	:R&R'4Q'7-'GM!$H I	7';<i0AQRR3S4 &&r.   r3   c                    t        |      dk7  rt        d      t        |      |j                  k7  r%t        dt        |       d|j                   d      t	        |d   t
              r| S t	        |d   t              r*t        j                  t        |             }t        |j                               D cg c]#  }t        j                  ||j                        % }}t        j                  ||       d}|d   j                   }t        |j                        D cg c]
  }||k7  s	| }	}|D ]A  }
t        j"                  ||	   |
|	         st        d      |
j%                         }|||   z  }C t        |       }|||d   j                   <   t        j&                  |      S t        d	t)        |d          d
      c c}w c c}w )a  
    Compute the global size of a DTensor from the given local tensor shape,
    the mesh and placements. Different from `compute_global_tensor_info`,
    which assumes sharding is even, this util allgathers local shards' shapes
    from all ranks and thus can support uneven sharding.
    NOTE: Currently this function only supports 1D mesh.

    Args:
        shape (:class:`torch.Size`):
            Shape of the local tensor
        mesh (:class:`DeviceMesh`):
            Object which describes the mesh topology
            of devices for the DTensor.
        placements (Sequence[:class:`Placement`]]):
            The attribute of the DTensor that describes its layout
            on the mesh topology.

    Return:
        tensor_shape: Shape of the global DTensor.
       z>compute_global_tensor_shape only supports 1 placement for now.r   r   r   r   )devicez?Non-sharded dimentions should have identical size across ranks.zPlacement type z not supported.)r   r   rM   r   r   r   r   torchrF   r   rN   rI   
empty_likerY   funcolall_gather_inplacer   equaltolistSizerO   )r3   r0   r   r>   _gathered_shaped_tensorssharded_dim_sumrB   d
other_dimsshape_tensorshape_tensor_listr/   s                r,   compute_global_tensor_shaperh      s   . :!!L
 	
 :$))#Z))9$))KQ
 	

 *Q-+	JqM5	)ll4;/ 499;'#
 [1C1CD#
 #
 	!!"9;GqM%%	!&tyy!1DAQ)^aD
D3 	<L;;{:6Z8PQ"U  !- 3 3 50;;O	< E{*9Z]&&'zz,''!d:a=12/B
 	
'#
 Es   9(G!
G
,G
op_callargsc                 >   |D ]  }t        |t        j                  t        f      r|j                  c S t        |t
        t        f      sHt        |      dkD  sWt        |d   t        j                  t        f      s{|d   j                  c S  t        d|  d      )z
    Find the device mesh object from args.
    It returns None if no mesh is found.
    NOTE: we can optimize this search if needed
    r   z+Cannot find device mesh from args for op : rH   )	r   dtensorDTensorr	   device_meshr   r<   r   
ValueError)ri   rj   args      r,   try_find_mesh_from_argsrq   8  s      &cGOO[9:??"sT5M*C13q6GOO[#ABq6%%%& B7)1M
NNr.   global_stridec                 p    dgt               z  t        |      D ]q  \  }}|j                         st        t        |      j
                  }t        t                     D ]*  } |    |   kD  s|xx   |j                  |      z  cc<   , s t         fdt        t                     D              S )z
    Compute the stride of a local tensor shard, given the global stride of the DTensor.
    NOTE: Currently this function is assuming the DTensor is evenly shardable.
    rX   c              3   4   K   | ]  }|   |   z    y w)Nr8   ).0rU   rr   stride_divisorss     r,   	<genexpr>z'compute_local_stride.<locals>.<genexpr>^  s%      34aOA..s   )	r   r   rK   r   r   r   rN   rI   r<   )rr   r0   r   mesh_idxr'   rU   jrv   s   `      @r,   compute_local_striderz   M  s     cC..O , >!::<UA""A 3}-. > #mA&66#A&$))H*==&>>  8=c->P8Q  r.   c                    t        | t        j                        r| S t        | t              r| g}n;t	        |       dk(  r"t        | d   t
              rt        | d         }nt        |       }t        j                  |      S )z
    Unify variable types of size argument to torch.Size
    Acceptable types include:
        int, Sequence[int], Tuple[int], Tuple[Sequence[int]],
        or torch.Size
    rX   r   )r   rZ   r`   intr   r   r   )rI   
torch_sizes     r,   normalize_to_torch_sizer~   c  se     $

#$V
	TaJtAw9$q']
$Z
::j!!r.   ),collectionsr   collections.abcr   typingr   r   rZ   )torch.distributed._functional_collectivesdistributed_functional_collectivesr\   torch.distributed.tensor._apirF   _apirl   torch._prims_commonr   torch.distributed.device_meshr   &torch.distributed.tensor._dtensor_specr	   (torch.distributed.tensor.placement_typesr
   r   r   r   r   r<   r|   r-   r5   r   r2   TensorrV   r`   rh   _ops
OpOverloadobjectrq   rz   r~   r8   r.   r,   <module>r      s   # $ !  : : / / ) 4 > 22'/	':2eCN#$2j++#-+;CI;N+
5c?E#s(O+,+^D8D8D8 DI&D8 #	D8
 5c?E#s(O+,D8N8'LL8' *8'8@8K8'
49d3i 8'v;
::;
';
5=i5H;

ZZ;
|OZZ""O*26*:OO*$.<DY<O
38_,"UZZ "r.   