
    rh                        d dl Zd dlZd dlmZ d dlmZmZmZ d dl	Z
d dlZd dlmc mZ d dlmZmZ ddlmZ ddlmZmZmZ ddlmZ dd	lmZmZ dd
lmZmZmZ ddl m!Z!  e       rd dl"m#Z#  e       rd dl$m%Z% d dl&m'Z' e ed       G d de                    Z(	 dLdej                   dej                   dej                   fdZ)dededefdZ*dej                   dej                   dej                   fdZ+ G d dejX                        Z-dedede.defdZ/dej                   dej                   de.dej                   fd Z0 G d! d"ejX                        Z1 G d# d$ejX                        Z2 G d% d&ejX                        Z3	 dMd'ejX                  d(ej                   d)ej                   d*ej                   d+eej                      d,e4d-e4fd.Z5 G d/ d0ejX                        Z6 G d1 d2ejX                        Z7dNd3ej                   d4e4d5e8dej                   fd6Z9 G d7 d8ejX                        Z: G d9 d:ejX                        Z; G d; d<ejX                        Z< G d= d>e      Z= G d? d@ej|                        Z? G dA dBejX                        Z@ G dC dDejX                        ZA G dE dFejX                        ZBe G dG dHe             ZC edI       G dJ dKeC             ZDdHdKgZEy)O    N)	dataclass)CallableOptionalUnion)Tensornn   )ACT2FN)ModelOutputis_scipy_availablerequires_backends)GradientCheckpointingLayer)ALL_ATTENTION_FUNCTIONSPreTrainedModel)auto_docstringcan_return_tupleis_accelerate_available   )
EomtConfig)linear_sum_assignment)PartialState)reducea  
    Class for outputs of [`EomtForUniversalSegmentationOutput`].

    This output can be directly passed to [`~EomtImageProcessor.post_process_semantic_segmentation`] or
    [`~EomtImageProcessor.post_process_instance_segmentation`] or
    [`~EomtImageProcessor.post_process_panoptic_segmentation`] to compute final segmentation maps. Please, see
    [`~EomtImageProcessor] for details regarding usage.
    )custom_introc                   <   e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeej                     ed<   dZeeej                        ed<   dZeeej                        ed<   dZeeej"                        ed	<   y)
"EomtForUniversalSegmentationOutputa+  
    loss (`torch.Tensor`, *optional*):
        The computed loss, returned when labels are present.
    class_queries_logits (`torch.FloatTensor`):
        A tensor of shape `(batch_size, num_queries, num_labels + 1)` representing the proposed classes for each
        query. Note the `+ 1` is needed because we incorporate the null class.
    masks_queries_logits (`torch.FloatTensor`):
        A tensor of shape `(batch_size, num_queries, height, width)` representing the proposed masks for each
        query.
    last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
        Last hidden states (final feature map) of the last layer.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
        shape `(batch_size, sequence_length, hidden_size)`. Hidden-states all layers of the model.
    attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tuple(torch.FloatTensor)` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`. Self and Cross Attentions weights from transformer decoder.
    patch_offsets (`list[torch.Tensor]`, *optional*):
        list of tuples indicating the image index and start and end positions of patches for semantic segementation.
    Nlossclass_queries_logitsmasks_queries_logitslast_hidden_statehidden_states
attentionspatch_offsets)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   r   r    tupler!   r"   listr        y/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/eomt/modeling_eomt.pyr   r   0   s    * )-D(5$$
%,8<(5#4#45<8<(5#4#45<59x 1 1298<M8E%"3"345<59Ju0012926M8D./6r-   r   input_featurespoint_coordinatesreturnc                     |j                         dk(  rd}|j                  d      }t        j                  j                  j
                  | d|z  dz
  fi |}|r|j                  d      }|S )a(  
    A wrapper around `torch.nn.functional.grid_sample` to support 3D point_coordinates tensors.

    Args:
        input_features (`torch.Tensor` of shape (batch_size, channels, height, width)):
            A tensor that contains features map on a height * width grid
        point_coordinates (`torch.Tensor` of shape (batch_size, num_points, 2) or (batch_size, grid_height, grid_width,:
        2)):
            A tensor that contains [0, 1] * [0, 1] normalized point coordinates
        add_dim (`bool`):
            boolean value to keep track of added dimension

    Returns:
        point_features (`torch.Tensor` of shape (batch_size, channels, num_points) or (batch_size, channels,
        height_grid, width_grid):
            A tensor that contains features for points in `point_coordinates`.
    r	   T   g       @      ?)dim	unsqueezer'   r   
functionalgrid_samplesqueeze)r/   r0   add_dimkwargspoint_featuress        r.   sample_pointr=   [   st    ( !#-77: XX((44^SK\E\_bEbmflmN'//2r-   inputslabelsc                    | j                         j                  d      } dt        j                  | |j                        z  }| j                  d      dddf   |j                  d      dddf   z   }d|dz   |dz   z  z
  }|S )a  
    A pair wise version of the dice loss, see `dice_loss` for usage.

    Args:
        inputs (`torch.Tensor`):
            A tensor representing a mask
        labels (`torch.Tensor`):
            A tensor with the same shape as inputs. Stores the binary classification labels for each element in inputs
            (0 for the negative class and 1 for the positive class).

    Returns:
        `torch.Tensor`: The computed loss between each pairs.
    r   r3   N)sigmoidflattenr'   matmulTsum)r>   r?   	numeratordenominatorr   s        r.   pair_wise_dice_lossrI   {   s|     ^^%%a(FELL22I**R.D)FJJrN47,CCK	A+/22DKr-   c                 \   | j                   d   }t        j                  d      } || t        j                  |             } || t        j
                  |             }t        j                  ||z  |j                        }t        j                  ||z  d|z
  j                        }||z   }|S )a  
    A pair wise version of the cross entropy loss, see `sigmoid_cross_entropy_loss` for usage.

    Args:
        inputs (`torch.Tensor`):
            A tensor representing a mask.
        labels (`torch.Tensor`):
            A tensor with the same shape as inputs. Stores the binary classification labels for each element in inputs
            (0 for the negative class and 1 for the positive class).

    Returns:
        loss (`torch.Tensor`): The computed loss between each pairs.
    r   none	reduction)shaper   BCEWithLogitsLossr'   	ones_like
zeros_likerD   rE   )	r>   r?   height_and_width	criterioncross_entropy_loss_poscross_entropy_loss_negloss_posloss_negr   s	            r.   $pair_wise_sigmoid_cross_entropy_lossrX      s     ||A$$v6I&vuv/FG&vu/?/?/GH||25EEvxxPH||25EEF
~~VHhDKr-   c                        e Zd ZdZ	 ddedededef fdZ ej                         dej                  dej                  d	ej                  d
ej                  de
ee	      f
d       Z xZS )EomtHungarianMatcheraq  This class computes an assignment between the labels and the predictions of the network.

    For efficiency reasons, the labels don't include the no_object. Because of this, in general, there are more
    predictions than labels. In this case, we do a 1-to-1 matching of the best predictions, while the others are
    un-matched (and thus treated as non-objects).
    
cost_class	cost_mask	cost_dice
num_pointsc                     t         |           |dk(  r|dk(  r|dk(  rt        d      || _        || _        || _        || _        y)aH  Creates the matcher

        Params:
            cost_class (`float`, *optional*, defaults to 1.0):
                Relative weight of the classification error in the matching cost.
            cost_mask (`float`, *optional*,  defaults to 1.0):
                This is the relative weight of the focal loss of the binary mask in the matching cost.
            cost_dice (`float`, *optional*, defaults to 1.0):
                This is the relative weight of the dice loss of the binary mask in the matching cost.
            num_points (`int`, *optional*, defaults to 12544):
                No. of points to sample on which the mask loss will be calculated. The same set of K points are
                uniformly sampled for all prediction and ground truth masks to construct the cost matrix for bipartite
                matching.
        r   zAll costs can't be 0N)super__init__
ValueErrorr^   r[   r\   r]   )selfr[   r\   r]   r^   	__class__s        r.   ra   zEomtHungarianMatcher.__init__   sK    " 	?yA~)q.344$$""r-   r   r   mask_labelsclass_labelsr1   c           	         g }|j                   d   }t        |      D ]  }||   j                  d      }||   }	|dd||   f    }
||   j                  |	      }|dddf   }|	dddf   }	t	        j
                  d| j                  d|	j                        }|j                  |j                   d   dd      }t        ||d      j                  d      }|j                  |	j                   d   dd      }t        |	|d      j                  d      }	t        |	|      }t        |	|      }| j                  |z  | j                  |
z  z   | j                  |z  z   }t	        j                   |t	        j"                  d	            }t	        j$                  |t	        j"                  d
            }t	        j&                  |d      }t)        |j+                               }|j-                  |        |D cg c]O  \  }}t	        j.                  |t        j0                        t	        j.                  |t        j0                        fQ }}}|S c c}}w )ao  
        Params:
            masks_queries_logits (`torch.Tensor`):
                A tensor of dim `batch_size, num_queries, num_labels` with the classification logits.
            class_queries_logits (`torch.Tensor`):
                A tensor of dim `batch_size, num_queries, height, width` with the predicted masks.
            class_labels (`torch.Tensor`):
                A tensor of dim `num_target_boxes` (where num_target_boxes is the number of ground-truth objects in the
                target) containing the class labels.
            mask_labels (`torch.Tensor`):
                A tensor of dim `num_target_boxes, height, width` containing the target masks.

        Returns:
            matched_indices (`list[tuple[Tensor]]`): A list of size batch_size, containing tuples of (index_i, index_j)
            where:
                - index_i is the indices of the selected predictions (in order)
                - index_j is the indices of the corresponding selected labels (in order)
            For each batch element, it holds:
                len(index_i) = len(index_j) = min(num_queries, num_target_boxes).
        r   rA   Nr   r3   deviceFalign_cornersg    _Bg    _©dtype)rN   rangesoftmaxtor'   randr^   ri   repeatr=   r9   rX   rI   r\   r[   r]   minimumtensormaximum
nan_to_numr   cpuappend	as_tensorint64)rc   r   r   re   rf   indices
batch_sizei
pred_probs	pred_maskr[   target_maskr0   target_coordinatespred_coordinatesr\   r]   cost_matrixassigned_indicesjmatched_indicess                        r.   forwardzEomtHungarianMatcher.forward   s/   8 *, *//2
z" 	-A-a088<J,Q/I %QQ%788J%a.++I6K%ag.K!!T'*I !&

1dooqIYIY Z!2!9!9+:K:KA:NPQST!U&{4FV[\ddefgK077	8JAqQ$Y0@PUV^^_`aI =YTI+I{CI..94t7SSVZVdVdgpVppK--U\\$5GHK--U\\%5HIK**;:K0EkooFW0XNN+,?	-F ho
_c_`bcU__Qekk2EOOAU[[4YZ
 
 
s   5AI)r4   r4   r4   i 1  )r#   r$   r%   r&   floatintra   r'   no_gradr   r+   r*   r   __classcell__rd   s   @r.   rZ   rZ      s     jo##27#JO#cf#4 U]]_D#llD $llD \\	D
 llD 
eFm	D Dr-   rZ   	num_masksc                     | j                         j                  d      }d||z  j                  d      z  }|j                  d      |j                  d      z   }d|dz   |dz   z  z
  }|j                         |z  }|S )a4  
    Compute the DICE loss, similar to generalized IOU for masks as follows:

    $$ \mathcal{L}_{\text{dice}(x, y) = 1 - \frac{2 * x \cap y }{x \cup y + 1}} $$

    In practice, since `labels` is a binary mask, (only 0s and 1s), dice can be computed as follow

    $$ \mathcal{L}_{\text{dice}(x, y) = 1 - \frac{2 * x * y }{x + y + 1}} $$

    Args:
        inputs (`torch.Tensor`):
            A tensor representing a mask.
        labels (`torch.Tensor`):
            A tensor with the same shape as inputs. Stores the binary classification labels for each element in inputs
            (0 for the negative class and 1 for the positive class).
        num_masks (`int`):
            The number of masks present in the current batch, used for normalization.

    Returns:
        `torch.Tensor`: The computed loss.
    r   r3   rA   )rB   rC   rF   )r>   r?   r   probsrG   rH   r   s          r.   	dice_lossr     sz    , NN$$Q'EUV^((,,I))B-&**R.0K	A+/22D88:	!DKr-   c                     t        j                  d      } || |      }|j                  d      j                         |z  }|S )a|  
    Args:
        inputs (`torch.Tensor`):
            A float tensor of arbitrary shape.
        labels (`torch.Tensor`):
            A tensor with the same shape as inputs. Stores the binary classification labels for each element in inputs
            (0 for the negative class and 1 for the positive class).

    Returns:
        loss (`torch.Tensor`): The computed loss.
    rK   rL   r   )r   rO   meanrF   )r>   r?   r   rS   cross_entropy_lossr   s         r.   sigmoid_cross_entropy_lossr   5  sD     $$v6I"662""1%))+i7DKr-   c                       e Zd Zdedeeef   f fdZdeee	      dee	   fdZ
dee   deeef   fdZd	ed
ee   deej                     deeef   fdZdej                  deej                     deej                     de	deeej                  f   f
dZd Zd Zdej                  dej                  fdZdej                  de	de	dedej                  f
dZ	 ddej                  d	ej                  deej                     d
eej                     deeeej                  f      deeej                  f   fdZd
ej                  dej2                  dej                  fdZ xZS )EomtLossconfigweight_dictc                    t         |           t        | dg       |j                  | _        || _        |j
                  | _        t        j                  | j                  dz         }| j                  |d<   | j                  d|       |j                  | _        |j                  | _        |j                  | _        t        |j                  |j                   |j"                  | j                        | _        y)aH  
        The Eomt Loss. The loss is computed very similar to DETR. The process happens in two steps: 1) we
        compute hungarian assignment between ground truth masks and the outputs of the model 2) we supervise each pair
        of matched ground-truth / prediction (supervise class and mask)

        Args:
            config (`EomtConfig`):
                The configuration for Eomt model also containing loss calculation specific parameters.
            weight_dict (`dict[str, float]`):
                A dictionary of weights to be applied to the different losses.
        scipyr   rA   empty_weight)r[   r]   r\   r^   N)r`   ra   r   
num_labelsr   no_object_weighteos_coefr'   onesregister_buffertrain_num_pointsr^   oversample_ratioimportance_sample_ratiorZ   class_weightdice_weightmask_weightmatcher)rc   r   r   r   rd   s       r.   ra   zEomtLoss.__init__J  s     	$	* ++& //zz$//A"56==R^\: !11 & 7 7'-'E'E$+**((((	
r-   sizesr1   c                 n    |d   }|dd  D ]'  }t        |      D ]  \  }}t        ||   |      ||<    ) |S )Nr   r   )	enumeratemax)rc   r   maxessublistindexitems         r.   _max_by_axiszEomtLoss._max_by_axism  sS    aQRy 	7G(1 7t"5<6e7	7 r-   tensorsc                 `   | j                  |D cg c]  }t        |j                         c}      }t        |      g|z   }|\  }}}}|d   j                  }	|d   j
                  }
t        j                  ||	|
      }t        j                  |||ft        j                  |
      }t        |||      D ]o  \  }}}|d |j                  d   d |j                  d   d |j                  d   f   j                  |       d|d |j                  d   d |j                  d   f<   q ||fS c c}w )Nr   rm   ri   r   r3   F)r   r+   rN   lenrm   ri   r'   zerosr   boolzipcopy_)rc   r   rt   max_sizebatch_shaper|   _heightwidthrm   ri   padded_tensorspadding_maskspadded_tensorpadding_masks                  r.   _pad_images_to_max_in_batchz$EomtLoss._pad_images_to_max_in_batchu  s7   $$w%OVd6<<&8%OP7|nx/'2$
Avu
  ""[fM

J#>ejjY_`36wP]3^ 	G/FM<+FLLO+->v||A->@Q&,,q/@QQRXXY_`AFL*6<<?*,=fll1o,==>	G },, &Ps   D+r   rf   r{   c           	         |}|j                   \  }}}t        j                  | j                        }| j	                  |      }	t        j                  t        ||      D 
cg c]  \  }
\  }}|
|    c}}}
      }t        j                  ||f| j                  t
        j                  |j                        }|||	<   |j                  dd      } |||      }d|i}|S c c}}}
w )a  Compute the losses related to the labels using cross entropy.

        Args:
            class_queries_logits (`torch.Tensor`):
                A tensor of shape `batch_size, num_queries, num_labels`
            class_labels (`list[torch.Tensor]`):
                List of class labels of shape `(labels)`.
            indices (`tuple[np.array])`:
                The indices computed by the Hungarian matcher.

        Returns:
            `dict[str, Tensor]`: A dict of `torch.Tensor` containing the following key:
            - **loss_cross_entropy** -- The loss computed using cross entropy on the predicted and ground truth labels.
        )weight)
fill_valuerm   ri   r   r3   loss_cross_entropy)rN   r   CrossEntropyLossr   $_get_predictions_permutation_indicesr'   catr   fullr   rz   ri   	transpose)rc   r   rf   r{   pred_logitsr|   num_queriesr   rS   idxtargetr   target_classes_otarget_classespred_logits_transposedloss_celossess                    r.   loss_labelszEomtLoss.loss_labels  s    " +%0%6%6"
K''t/@/@A	77@ 99-0w-GHH>66AqVAYH
 %$//]h]o]o
 /s!,!6!6q!!<2NC&0 Is   #C!r   re   r   c                      j                  |      } j                  |      }||   } j                  |      \  }}	||   }|dddf   }|dddf   }t        j                         5   j                  | fd j                   j                   j                        }
t        ||
d      j                  d      }ddd       t        |
d      j                  d      }t        ||      t        |||      d}~~|S # 1 sw Y   ExY w)a  Compute the losses related to the masks using sigmoid_cross_entropy_loss and dice loss.

        Args:
            masks_queries_logits (`torch.Tensor`):
                A tensor of shape `(batch_size, num_queries, height, width)`.
            mask_labels (`torch.Tensor`):
                List of mask labels of shape `(labels, height, width)`.
            indices (`tuple[np.array])`:
                The indices computed by the Hungarian matcher.
            num_masks (`int)`:
                The number of masks, used for normalization.

        Returns:
            losses (`dict[str, Tensor]`): A dict of `torch.Tensor` containing two keys:
            - **loss_mask** -- The loss computed using sigmoid cross entropy loss on the predicted and ground truth.
              masks.
            - **loss_dice** -- The loss computed using dice loss on the predicted on the predicted and ground truth,
              masks.
        Nc                 &    j                  |       S N)calculate_uncertainty)logitsrc   s    r.   <lambda>z%EomtLoss.loss_masks.<locals>.<lambda>  s    t99&A r-   Frj   r   )	loss_mask	loss_dice)r    _get_targets_permutation_indicesr   r'   r   sample_points_using_uncertaintyr^   r   r   r=   r9   r   r   )rc   r   re   r{   r   src_idxtgt_idx
pred_maskstarget_masksr   r0   point_labelspoint_logitsr   s   `             r.   
loss_maskszEomtLoss.loss_masks  s,   4 ;;GD77@)'2
 ::;Ga#G,  4(
#AtG, ]]_ 		i $ D DA%%,,! (6GW\]eefghL		i $J0AQVW__`ab 4L,PYZ"<yI

 )		i 		is   (AD  D	c                    t        j                  t        |      D cg c]  \  }\  }}t        j                  ||        c}}}      }t        j                  |D cg c]  \  }}|	 c}}      }||fS c c}}}w c c}}w r   r'   r   r   	full_like)rc   r{   r}   srcr   batch_indicespredictions_indicess          r.   r   z-EomtLoss._get_predictions_permutation_indices  sj    		iX_N`"a"a{q(35??3#:"ab#iiW(E#q(EF111 #b(E   #A7A>
c                    t        j                  t        |      D cg c]  \  }\  }}t        j                  ||        c}}}      }t        j                  |D cg c]  \  }}|	 c}}      }||fS c c}}}w c c}}w r   r   )rc   r{   r}   r   tgtr   target_indicess          r.   r   z)EomtLoss._get_targets_permutation_indices  sh    		iX_N`"a"a{q(1c5??3#:"ab#@HQC#@An,, #b#@r   r   c                 2    t        j                  |       }|S )a  
        In Eomt paper, uncertainty is estimated as L1 distance between 0.0 and the logit prediction in 'logits'
        for the foreground class in `classes`.

        Args:
            logits (`torch.Tensor`):
            A tensor of shape (R, 1, ...) for class-specific or class-agnostic, where R is the total number of predicted masks in all images and C is:
            the number of foreground classes. The values are logits.

        Returns:
            scores (`torch.Tensor`): A tensor of shape (R, 1, ...) that contains uncertainty scores with the most
            uncertain locations having the highest uncertainty score.
        )r'   abs)rc   r   uncertainty_scoress      r.   r   zEomtLoss.calculate_uncertainty  s      %yy01!!r-   r^   r   r   c           	         |j                   d   }t        ||z        }t        j                  ||d|j                        }t        ||d      }	 ||	      }
t        ||z        }||z
  }t        j                  |
dddddf   |d      d   }|t        j                  |t        j                  |j                  	      z  }||dddf   z  }|j                  d
d      |j                  d
      ddf   j                  ||d      }|dkD  r:t        j                  |t        j                  ||d|j                        gd      }|S )a  
        This function is meant for sampling points in [0, 1] * [0, 1] coordinate space based on their uncertainty. The
        uncertainty is calculated for each point using the passed `uncertainty function` that takes points logit
        prediction as input.

        Args:
            logits (`float`):
                Logit predictions for P points.
            uncertainty_function:
                A function that takes logit predictions for P points and returns their uncertainties.
            num_points (`int`):
                The number of points P to sample.
            oversample_ratio (`int`):
                Oversampling parameter.
            importance_sample_ratio (`float`):
                Ratio of points that are sampled via importance sampling.

        Returns:
            point_coordinates (`torch.Tensor`):
                Coordinates for P sampled points.
        r   r3   rh   Frj   Nr   )kr5   r   rA   r5   )rN   r   r'   rq   ri   r=   topkarangelongviewr   )rc   r   uncertainty_functionr^   r   r   	num_boxesnum_points_sampledr0   r   point_uncertaintiesnum_uncertain_pointsnum_random_pointsr   shifts                  r.   r   z(EomtLoss.sample_points_using_uncertainty  sI   < LLO	 .>!>? "JJy2DaPVP]P]^#F,=US2<@"#:Z#GH&)==jj,Q1W59MSTUVWX"U\\)5::V\VcVc%dduQW~-222q9#((2,/JOOPY[oqrsq  %		"EJJy:KQW]WdWd$ef! ! r-   auxiliary_predictionsc                    | j                  ||||      }| j                  ||d   j                        }i | j                  ||||      | j	                  |||      }|jt        |      D ]\  \  }	}
|
d   }|
d   }| j                  ||||      }|j                         D ci c]  \  }}| d|	 | }}}|j                  |       ^ |S c c}}w )a  
        This performs the loss computation.

        Args:
            masks_queries_logits (`torch.Tensor`):
                A tensor of shape `(batch_size, num_queries, height, width)`.
            class_queries_logits (`torch.Tensor`):
                A tensor of shape `(batch_size, num_queries, num_labels)`.
            mask_labels (`torch.Tensor`):
                List of mask labels of shape `(labels, height, width)`.
            class_labels (`list[torch.Tensor]`):
                List of class labels of shape `(labels)`.
            auxiliary_predictions (`dict[str, torch.Tensor]`, *optional*):
                if `use_auxiliary_loss` was set to `true` in [`EomtConfig`], then it contains the logits from
                the inner layers of the EomtMaskedAttentionDecoder.

        Returns:
            losses (`dict[str, Tensor]`): A dict of `torch.Tensor` containing three keys:
            - **loss_cross_entropy** -- The loss computed using cross entropy on the predicted and ground truth labels.
            - **loss_mask** -- The loss computed using sigmoid cross_entropy loss on the predicted and ground truth
              masks.
            - **loss_dice** -- The loss computed using dice loss on the predicted on the predicted and ground truth
              masks.
            if `use_auxiliary_loss` was set to `true` in [`EomtConfig`], the dictionary contains additional
            losses for each auxiliary predictions.
        r   rh   r   r   r   )	r   get_num_masksri   r   r   r   r   itemsupdate)rc   r   r   re   rf   r  r{   r   r   r   aux_outputs	loss_dictkeyvalues                 r.   r   zEomtLoss.forward:  s
   H ,,35I;Xde&&|LO<R<R&S	%
oo2K)T%
3\7K%

 !,$-.C$D ) ['23I'J$'23I'J$ LL)=?SU`bno	EN__EVWzsEuAcU^U2W	Wi()  Xs   "Cri   c                 P   t        |D cg c]  }t        |       c}      }t        j                  |t        j                  |      }d}t               r2t        j                  i k7  rt        |      }t               j                  }t        j                  ||z  d      }|S c c}w )zk
        Computes the average number of target masks across the batch, for normalization purposes.
        r   r   )min)rF   r   r'   ry   r   r   r   _shared_stater   num_processesclamp)rc   rf   ri   classesr   
world_sizes         r.   r  zEomtLoss.get_num_masksq  s     \B'WBC	OOIU[[P	
"$))R/"9-	)^99
KK	J 6A>	 Cs   B#r   )r#   r$   r%   r   dictstrr   ra   r+   r   r   r   r*   r   nparrayr   r'   r   r   r   r   r   r   r   ri   r  r   r   s   @r.   r   r   I  s   !
z !
S%Z8H !
F$tCy/ d3i -4< -E&RX.DY -" $* :>v, QVWYW_W_Q` 	c6k	 D<#ll< %,,'< rxx	<
 < 
c5<<	 <|2-"ELL "U\\ ""5!5! 	5!
 5! "'5! 
5!z DH5#ll5 $ll5 %,,'	5
 5<<(5  (S%,,->(?@5 
c5<<	 5n%,,  QVQ]Q] r-   r   c                   Z     e Zd ZdZ fdZdej                  dej                  fdZ xZS )EomtPatchEmbeddingsz
    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
    `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
    Transformer.
    c                    t         |           |j                  |j                  }}|j                  |j
                  }}t        |t        j                  j                        r|n||f}t        |t        j                  j                        r|n||f}|d   |d   z  |d   |d   z  z  }|| _        || _        || _        || _
        t        j                  ||||      | _        y )Nr   r   kernel_sizestride)r`   ra   
image_size
patch_sizenum_channelshidden_size
isinstancecollectionsabcIterablenum_patchesr   Conv2d
projection)rc   r   r  r  r  r  r$  rd   s          r.   ra   zEomtPatchEmbeddings.__init__  s    !'!2!2F4E4EJ
$*$7$79K9Kk#-j+//:R:R#SZZdfpYq
#-j+//:R:R#SZZdfpYq
!!}
15*Q-:VW=:XY$$(&))L+:^hir-   pixel_valuesr1   c                     |j                   d   }|| j                  k7  rt        d| j                   d| d      | j                  |      j	                  d      j                  dd      }|S )Nr   zoMake sure that the channel dimension of the pixel values match with the one set in the configuration. Expected z	 but got .r3   )rN   r  rb   r&  rC   r   )rc   r'  r  
embeddingss       r.   r   zEomtPatchEmbeddings.forward  sz    #))!,4,,,!../yaI  __\2::1=GG1M
r-   )	r#   r$   r%   r&   ra   r'   r   r   r   r   s   @r.   r  r    s)    jELL U\\ r-   r  c                   d     e Zd ZdZdeddf fdZdej                  dej                  fdZ xZ	S )EomtEmbeddingszM
    Construct the CLS token, mask token, position and patch embeddings.
    r   r1   Nc                    t         |           || _        |j                  | _        t	        j
                  t        j                  dd|j                              | _	        t	        j
                  t        j                  d|j                  |j                              | _        t        |      | _        | j                  j                  }t	        j                   |j"                        | _        d|j                  z   | _        t	        j(                  ||j                        | _        | j-                  dt        j.                  |      j1                  d      d       y )Nr   position_ids)r   rA   F)
persistent)r`   ra   r   r  r   	Parameterr'   randnr  	cls_tokenr   num_register_tokensregister_tokensr  patch_embeddingsr$  Dropouthidden_dropout_probdropoutnum_prefix_tokens	Embeddingposition_embeddingsr   r   expand)rc   r   r$  rd   s      r.   ra   zEomtEmbeddings.__init__  s     ++ekk!Q8J8J&KL!||EKK6;U;UW]WiWi,jk 3F ;++77zz&"<"<=!"V%?%?!?#%<<V=O=O#P ^U\\+-F-M-Mg-Vchir-   r'  c                    |j                   \  }}}}| j                  j                  j                  j                  }| j                  |j                  |            }| j                  j                  |dd      }| j                  j                  |dd      }|| j                  | j                        z   }t        j                  |||gd      }| j                  |      }|S )Nrl   rA   r   r   )rN   r5  r&  r   rm   rp   r2  r<  r4  r;  r.  r'   r   r8  )rc   r'  r|   r   target_dtyper*  
cls_tokensr4  s           r.   r   zEomtEmbeddings.forward  s    *00
Aq!,,77>>DD**<???+NO
^^**:r2>
..55j"bI$":":4;L;L"MM
YY
OZHaP
\\*-
r-   )
r#   r$   r%   r&   r   ra   r'   r   r   r   r   s   @r.   r,  r,    s9    jz jd j ELL U\\ r-   r,  modulequeryr	  r
  attention_maskscalingr8  c                    t        j                  ||j                  dd            |z  }|||z   }t        j                  j                  |dt         j                        j                  |j                        }t        j                  j                  ||| j                        }t        j                  ||      }	|	j                  dd      j                         }	|	|fS )NrA   )r5   rm   )ptrainingr   r3   )r'   rD   r   r   r7   ro   float32rp   rm   r8  rG  
contiguous)
r@  rA  r	  r
  rB  rC  r8  r;   attn_weightsattn_outputs
             r.   eager_attention_forwardrL    s     <<s}}R'<=GL!#n4==((2U]](SVVW\WbWbcL==((6??([L,,|U3K''1-88:K$$r-   c            
            e Zd ZdZ fdZ	 ddej                  deej                     deej                  eej                     f   fdZ	 xZ
S )EomtAttentionz=Multi-headed attention from 'Attention Is All You Need' paperc                    t         |           || _        |j                  | _        |j
                  | _        | j                  | j                  z  | _        | j                  | j                  z  | j                  k7  r&t        d| j                   d| j                   d      | j                  dz  | _	        |j                  | _        d| _        t        j                  | j                  | j                        | _        t        j                  | j                  | j                        | _        t        j                  | j                  | j                        | _        t        j                  | j                  | j                        | _        y )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).g      F)r`   ra   r   r  	embed_dimnum_attention_heads	num_headshead_dimrb   scaleattention_dropoutr8  	is_causalr   Lineark_projv_projq_projout_projrc   r   rd   s     r.   ra   zEomtAttention.__init__  s   ++33$..8==4>>)T^^;MdnnM] ^NN#2'  ]]D(
//ii?ii?ii?		$..$..Ar-   r    rB  r1   c           
      :   |j                   \  }}}| j                  |      }| j                  |      }| j                  |      }	|j	                  ||| j
                  | j                        j                  dd      }|j	                  ||| j
                  | j                        j                  dd      }|	j	                  ||| j
                  | j                        j                  dd      }	t        }
| j                  j                  dk7  rt        | j                  j                     }
 |
| |||	|| j                  | j                  | j                  sdn| j                        \  }}|j!                  |||      j#                         }| j%                  |      }||fS )z#Input shape: Batch x Time x Channelr   r3   eager        )rV  rC  r8  )rN   rZ  rX  rY  r   rR  rS  r   rL  r   _attn_implementationr   rV  rT  rG  r8  reshaperI  r[  )rc   r    rB  r;   r|   
seq_lengthrP  querieskeysvaluesattention_interfacerK  rJ  s                r.   r   zEomtAttention.forward  sa    -:,?,?)
J	++m,{{=)]+,,z:t~~t}}U__`acdeyyZOYYZ[]^_ZT^^T]]S]]^_abc(?;;++w6"9$++:Z:Z"[$7nnJJ#}}C$,,	%
!\ "))*j)LWWYmmK0L((r-   r   )r#   r$   r%   r&   ra   r'   r   r   r*   r   r   r   s   @r.   rN  rN    sV    GB. 26$)||$) !.$)
 
u||Xell33	4$)r-   rN  c                   X     e Zd Zd fdZdej
                  dej
                  fdZ xZS )EomtLayerScaler1   c                     t         |           t        j                  |j                  t        j                  |j                        z        | _        y r   )	r`   ra   r   r0  layerscale_valuer'   r   r  lambda1r\  s     r.   ra   zEomtLayerScale.__init__  s8    ||F$;$;ejjI[I[>\$\]r-   hidden_statec                      || j                   z  S r   )rk  rc   rl  s     r.   r   zEomtLayerScale.forward!  s    dll**r-   r1   Nr#   r$   r%   ra   r'   r   r   r   r   s   @r.   rh  rh    s$    ^+ELL +U\\ +r-   rh  input	drop_probrG  c                    |dk(  s|s| S d|z
  }| j                   d   fd| j                  dz
  z  z   }|t        j                  || j                  | j
                        z   }|j                          | j                  |      |z  }|S )aF  
    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
    argument.
    r_  r   r   )r   r   )rN   ndimr'   rq   rm   ri   floor_div)rq  rr  rG  	keep_probrN   random_tensoroutputs          r.   	drop_pathrz  %  s     CxII[[^

Q 77E

5ELL YYMYYy!M1FMr-   c                   x     e Zd ZdZd	dee   ddf fdZdej                  dej                  fdZ	de
fdZ xZS )
EomtDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nrr  r1   c                 0    t         |           || _        y r   )r`   ra   rr  )rc   rr  rd   s     r.   ra   zEomtDropPath.__init__<  s    "r-   r    c                 D    t        || j                  | j                        S r   )rz  rr  rG  rc   r    s     r.   r   zEomtDropPath.forward@  s    FFr-   c                      d| j                    S )Nzp=)rr  rc   s    r.   
extra_reprzEomtDropPath.extra_reprC  s    DNN#$$r-   r   )r#   r$   r%   r&   r   r   ra   r'   r   r   r  r  r   r   s   @r.   r|  r|  9  sG    b#(5/ #T #GU\\ Gell G%C %r-   r|  c                   X     e Zd Zd fdZdej
                  dej
                  fdZ xZS )EomtMLPr1   c                 ~   t         |           |j                  x}}t        |j                  |j                  z        }t        j                  ||d      | _        t        |j                  t              rt        |j                     | _        n|j                  | _        t        j                  ||d      | _        y )NTbias)r`   ra   r  r   	mlp_ratior   rW  fc1r   
hidden_actr  r
   
activationfc2rc   r   in_featuresout_featureshidden_featuresrd   s        r.   ra   zEomtMLP.__init__H  s    %+%7%77lf0063C3CCD99[/Ef''-$V%6%67DO$//DO99_lFr-   rl  c                 l    | j                  |      }| j                  |      }| j                  |      }|S r   )r  r  r  rn  s     r.   r   zEomtMLP.forwardS  s2    xx-|4xx-r-   ro  rp  r   s   @r.   r  r  G  s$    	GELL U\\ r-   r  c                   X     e Zd Zd fdZdej
                  dej
                  fdZ xZS )EomtSwiGLUFFNr1   c                 0   t         |           |j                  x}}t        |j                  |j                  z        }t        |dz  dz        dz   dz  dz  }t        j                  |d|z  d      | _        t        j                  ||d      | _        y )Nr3   r	         Tr  )	r`   ra   r  r   r  r   rW  
weights_inweights_outr  s        r.   ra   zEomtSwiGLUFFN.__init__[  s    %+%7%77lf0063C3CCD2Q67!;AAE))K_1D4P99_lNr-   rl  c                     | j                  |      }|j                  dd      \  }}t        j                  j	                  |      |z  }| j                  |      S )Nr3   rA   r   )r  chunkr   r7   silur  )rc   rl  x1x2hiddens        r.   r   zEomtSwiGLUFFN.forwardd  sS    |4##A2#.B##B'",''r-   ro  rp  r   s   @r.   r  r  Z  s$    O(ELL (U\\ (r-   r  c                        e Zd ZdZdeddf fdZ	 	 d
dej                  deej                     de	de
eej                  ej                  f   eej                     f   fd	Z xZS )	EomtLayerzCThis corresponds to the Block class in the original implementation.r   r1   Nc                    t         |           t        j                  |j                  |j
                        | _        t        |      | _        t        |      | _
        |j                  dkD  rt        |j                        nt        j                         | _        t        j                  |j                  |j
                        | _        |j                   rt#        |      | _        nt'        |      | _        t        |      | _        y )Nepsr_  )r`   ra   r   	LayerNormr  layer_norm_epsnorm1rN  	attentionrh  layer_scale1drop_path_rater|  Identityrz  norm2use_swiglu_ffnr  mlpr  layer_scale2r\  s     r.   ra   zEomtLayer.__init__n  s    \\&"4"4&:O:OP
&v.*62@F@U@UX[@[f&;&;<acalalan\\&"4"4&:O:OP
  $V,DHvDH*62r-   r    	head_maskoutput_attentionsc                 D   | j                  | j                  |      ||      }|d   }| j                  |      }|dd  }| j                  |      |z   }| j	                  |      }| j                  |      }| j                  |      }| j                  |      |z   }|f|z   }|S )N)r  r   r   )r  r  r  rz  r  r  r  )rc   r    r  r  self_attention_outputsattention_outputoutputslayer_outputs           r.   r   zEomtLayer.forward~  s     "&JJ}%/ "0 "

 2!4,,-=>(, '78=H zz-0xx-((6 ~~l3mC/G+r-   )NF)r#   r$   r%   r&   r   ra   r'   r   r   r   r   r*   r   r   r   s   @r.   r  r  k  s~    M3z 3d 3& -1"'	|| ELL)  	
 
uU\\5<</0%2EE	Fr-   r  c                   X     e Zd Zd fd	Zdej
                  dej
                  fdZ xZS )EomtLayerNorm2dc                 *    t         |   |||       y )N)r  elementwise_affine)r`   ra   )rc   r  r  affinerd   s       r.   ra   zEomtLayerNorm2d.__init__  s    36Jr-   rl  r1   c                     |j                  dddd      }t        j                  || j                  | j                  | j
                  | j                        }|j                  dddd      }|S )Nr   r3   r	   r   )permuteF
layer_normnormalized_shaper   r  r  rn  s     r.   r   zEomtLayerNorm2d.forward  sb    #++Aq!Q7||L$2G2GVZV_V_aeaiaij#++Aq!Q7r-   )gư>Trp  r   s   @r.   r  r    s$    KELL U\\ r-   r  c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZ	S )EomtScaleLayerr   c                    t         |           |j                  }t        j                  ||dd      | _        t        |j                     | _        t        j                  ||dd|d      | _
        t        |      | _        y )Nr3   r  r	   r   F)r  paddinggroupsr  )r`   ra   r  r   ConvTranspose2dconv1r
   r  r  r%  conv2r  layernorm2drc   r   r  rd   s      r.   ra   zEomtScaleLayer.__init__  su    ((''[aXYZ
 !2!23YY

 +;7r-   r    r1   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }|S r   )r  r  r  r  r  s     r.   r   zEomtScaleLayer.forward  sB    

=16

=1((7r-   )
r#   r$   r%   r   ra   r'   rt   r   r   r   r   s   @r.   r  r    s*    8z 8 U\\ ell r-   r  c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )EomtScaleBlockr   c                     t         |           |j                  | _        t	        j
                  t        | j                        D cg c]  }t        |       c}      | _        y c c}w r   )	r`   ra   num_upscale_blocks
num_blocksr   
ModuleListrn   r  blockrc   r   r   rd   s      r.   ra   zEomtScaleBlock.__init__  sG     33]]E$//DZ#[qN6$:#[\
#[s   A&r    r1   c                 8    | j                   D ]
  } ||      } |S r   )r  )rc   r    r  s      r.   r   zEomtScaleBlock.forward  s%    ZZ 	1E!-0M	1r-   	r#   r$   r%   r   ra   r'   r   r   r   r   s   @r.   r  r    s,    ]z ]
U\\ ell r-   r  c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )EomtMaskHeadr   c                    t         |           |j                  }t        j                  ||      | _        t        j                  ||      | _        t        j                  ||      | _        t        |j                     | _
        y r   )r`   ra   r  r   rW  r  r  fc3r
   r  r  r  s      r.   ra   zEomtMaskHead.__init__  sa    ((99[+699[+699[+6 !2!23r-   r    r1   c                     | j                  | j                  |            }| j                  | j                  |            }| j                  |      }|S r   )r  r  r  r  r  s     r.   r   zEomtMaskHead.forward  sD    (?@(?@/r-   r  r   s   @r.   r  r    s*    4z 4U\\ ell r-   r  c                   Z    e Zd ZU dZeed<   dZdZdZdgZ	dZ
dZdej                  d	d
fdZy
)EomtPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    r   eomtr'  Fr  Tr@  r1   Nc                    | j                   j                  }t        |t        j                  t        j
                  t        j                  f      rt        j                  j                  |j                  t        j                  d             |j                  xt        j                  j                  |j                        \  }}|dkD  rdt        j                  |      z  nd}t        j                  j                  |j                  | |       y y t        |t        j                        rJ|j                  j                   j#                  d       |j                  j                   j%                          y t        |t        j&                        rf|j                  j                   j)                  dd       |j*                  2|j                  j                   |j*                     j%                          y y t        |t,              rGt/        |d      r:|j0                  j                   j#                  | j                   j2                         y y t        |t4              rt        j                  j7                  |j8                  j                   j;                  t<        j>                        d|      j;                  |j8                  j@                        |j8                  _        |jB                  j                   j%                          y y )	N   )ar   r   r4   r_  )r   stdrk  )"r   initializer_ranger   r   rW  r%  r  initkaiming_uniform_r   mathsqrtr  _calculate_fan_in_and_fan_outuniform_r  datafill_zero_r:  normal_padding_idxrh  hasattrrk  rj  r,  trunc_normal_r2  rp   r'   rH  rm   r4  )rc   r@  r  fan_inr   bounds         r.   _init_weightsz!EomtPreTrainedModel._init_weights  s   kk++fryy"))R5G5GHIGG$$V]]diil$C{{&GGAA&--P	17!DIIf--  ufe< ' -MM$$S)KK""$-MM&&CQ&7!!-""6#5#56<<> ./vy)##))$++*F*FG */$&GG$9$9  %%((7cs %: %b!!''( ! ""''--/	 0r-   )r#   r$   r%   r&   r   r)   base_model_prefixmain_input_namesupports_gradient_checkpointing_no_split_modules_supports_sdpa_supports_flash_attnr   Moduler  r,   r-   r.   r  r    sJ    
 $O&+#$N0BII 0$ 0r-   r  zV
    The EoMT Model with head on top for instance/semantic/panoptic segmentation.
    c                   $    e Zd ZdZdeddf fdZdededed	ed
eeef   deeef   fdZ	deeef   defdZ
ee	 	 	 	 	 ddedeee      d	eee      dee   dee   deee      defd              Zd Zdej                  fdZed        Z xZS )EomtForUniversalSegmentationr'  r   r1   Nc                    t         |   |       || _        |j                  | _        t	        |      | _        t        j                  |j                  |j                        | _
        t        j                  |j                  |j                        | _        t        j                  t        |j                        D cg c]  }t!        |       c}      | _        t%        |      | _        t)        |      | _        t        j,                  |j                  |j.                  dz         | _        |j2                  |j4                  z  |j2                  |j4                  z  f| _        |j8                  |j:                  |j<                  d| _        tA        || j>                        | _!        | jE                  dtG        jH                  |jJ                               | jM                          y c c}w )Nr  r   )r   r   r   )r   r   attn_mask_probs)'r`   ra   r   num_hidden_layersr,  r*  r   r  r  r  	layernormr:  r   rA  r  rn   r  layersr  upscale_blockr  	mask_headrW  r   class_predictorr  r  	grid_sizer   r   r   r   r   rS   r   r'   r   r  	post_initr  s      r.   ra   z%EomtForUniversalSegmentation.__init__  sp    !'!9!9(0f&8&8f>S>ST\\&"4"4f6H6HI
mmfF^F^@_$`1Yv%6$`a+F3%f-!yy););V=N=NQR=RS ++v/@/@@&BSBSW]WhWhBhi"("5"5++++.
 "T=M=MN.

6;L;L0MN% %as   >G*r   r   re   rf   r  c                     | j                  |||||      }| j                  j                         D ]'  \  }}|j                         D ]  \  }	}
||	v s|
|z  }
 ) |S )Nr   r   re   rf   r  )rS   r   r  )rc   r   r   re   rf   r  r  r	  r   loss_keyr   s              r.   get_loss_dictz*EomtForUniversalSegmentation.get_loss_dict+  s|     (,~~!5!5#%"7 (6 (
	  ++113 	#KC"+//"3 #$(?FND#	#
 r-   r  c                 4    t        |j                               S r   )rF   re  )rc   r  s     r.   get_lossz%EomtForUniversalSegmentation.get_lossC  s    9##%&&r-   output_hidden_statesr  r"   c           	         ||n| j                   j                  }||n| j                   j                  }|rdnd}|rdnd}d\  }	}
d}|t        d      | j	                  |      }t        | j                        D ]  \  }}|r||fz  }|| j                  | j                   j                  z
  k(  rp| j                  j                  dddddf   j                  |j                  d   dd      j                  |j                        }t        j                   ||fd      }|| j                  | j                   j                  z
  k\  r| j"                  s7| j$                  || j                  z
  | j                   j                  z      dkD  r| j'                  |      }| j)                  |      \  }}|	|fz  }	|
|fz  }
t        j*                  |j                  d   |j                  d   |j                  d   |j                  t        j,                  	      }t/        j0                  || j2                  d
      }|j5                  |j7                  d      |j7                  d      d      }| j                   j8                  }|| j                  j:                  z   }|dkD  |ddd||df<   | j=                  || j$                  || j                  z
  | j                   j                  z      |||j                        }|ddddf   j                  d| j                   j>                  dd      }|jA                         jC                  | d      } ||||      }|d   }|s||d   fz  } | j'                  |      }|r||fz  }| j)                  |      \  }}|	|fz  }	|
|fz  }
d}|B|@d}tE        |	|
      D ]/  \  }}| jG                  ||||d      }|| jI                  |      z  }1 tK        |||||||      S )ah  
        mask_labels (`list[torch.Tensor]`, *optional*):
            list of mask labels of shape `(num_labels, height, width)` to be fed to a model
        class_labels (`list[torch.LongTensor]`, *optional*):
            list of target class labels of shape `(num_labels, height, width)` to be fed to a model. They identify the
            labels of `mask_labels`, e.g. the label of `mask_labels[i][j]` if `class_labels[i][j]`.
        patch_offsets (`list[torch.Tensor]`, *optional*):
            list of tuples indicating the image index and start and end positions of patches for semantic segementation.
        Nr,   )r,   r,   z You have to specify pixel_valuesr   rA   r   r   )ri   rm   bilinear)sizemode)probnum_query_tokensencoder_start_tokensri   .g    er_  r  )r   r   r   r   r    r!   r"   )&r   r  r  rb   r*  r   r  r  r  rA  r   r<  rN   rp   ri   r'   r   rG  r  r  predictr   r   r  interpolater  r   r  r   r9  _disable_attention_maskrQ  r   masked_fillr   r  r
  r   )rc   r'  re   rf   r  r  r"   all_hidden_statesall_attentionsmasks_queries_logits_per_layerclass_queries_logits_per_layerrB  r    r   layer_modulerA  norm_hidden_statesr   r   interpolated_logitsr  r  layer_outputssequence_outputr   r  s                             r.   r   z$EomtForUniversalSegmentation.forwardF  sV   * %9$D $++JjJj 	 2C1N-TXT_T_TqTq"6BD0dIOF&(F?@@5!*4;;!7 5	6C#!m%55!d,,t{{/E/EEE

))$1*5<<]=P=PQR=SUWY[\__`m`t`tu %		5-*@a Hd,,t{{/E/EEE!5!5cD<R<R6RUYU`U`UkUk6k!lop!p%)^^M%B"=A\\J\=]:$&:.3G2II..3G2II.!&!''*!''*!''*(//**" '(mm4Ht~~dn&o#&9&>&>',,Q/1D1I1I!1Lb'# $(;;#:#: '7$//:[:['[$ ObdeNeq"3#3"35I5JJK "&!=!="--cD4J4J.JT[[McMc.cd%5)=)00 "> " "04!=!D!DRIhIhjlnp!q!/!5!5!7!C!C^OUY!Z(HYZM)!,M =#3"55k5	6n ..7/!3359\\/5R22&+?*AA&&+?*AA&"|'?D>A.0N? 
1:$&: !..)=)= +!-*. / 	 i00
1 2!5!5-+%'
 	
r-   c                 .    | j                   j                  S r   )r*  r5  r  s    r.   get_input_embeddingsz1EomtForUniversalSegmentation.get_input_embeddings  s    ///r-   r   c                    |d d d | j                   j                  d d f   }| j                  |      }|d d | j                   j                  | j                  j                  z   d d d f   }|j                  dd      } |j                  |j                  d   dg| j                   }| j                  |      }| j                  |      }t        j                  d||      }||fS )Nr   r3   r   rA   zbqc, bchw -> bqhw)r   r   r  r*  r9  r   ra  rN   r  r  r   r'   einsum)rc   r   query_tokensclass_logitsprefix_tokensmask_logitss         r.   r  z$EomtForUniversalSegmentation.predict  s    a!:4;;#:#:!:A=>++L9q$++"9"9DOO<]<]"]"_abbc%//15---m.A.A!.DbZ4>>Z~~l3**=9ll#6mTL((r-   c                     |dk  r9t        j                  | j                  d   ||      |kD  }d| d d d ||d f   |<   | S )Nr   r   rh   )r'   rq   rN   )	attn_maskr  r  r  ri   random_queriess         r.   r  z4EomtForUniversalSegmentation._disable_attention_mask  sW    !8"ZZ	(:<LU[\_ccN VWIa***,@,AAB>Rr-   )NNNNN)r#   r$   r%   r  r   ra   r   r  r  r  r
  r   r   r   r+   r   r   r   r!  r'   r  staticmethodr  r   r   s   @r.   r  r    sQ    %Oz d 8$ % 	
   $CK0 
c6k	0'$sF{"3 ' '  /3/3/3,004x
x
 d6l+x
 tF|,	x

 'tnx
 $D>x
  V-x
 
,x
  x
t0)ell )   r-   r  )F)r_  )r_  F)Fcollections.abcr!  r  dataclassesr   typingr   r   r   numpyr  r'   torch.nn.functionalr   r7   r  r   activationsr
   
file_utilsr   r   r   modeling_layersr   modeling_utilsr   r   utilsr   r   r   configuration_eomtr   scipy.optimizer   
accelerater   accelerate.utilsr   r   r=   rI   rX   r  rZ   r   r   r   r   r  r,  r   rL  rN  rh  r   rz  r|  r  r  r  r  r  r  r  r  r  r  __all__r,   r-   r.   <module>r;     s  ,   ! , ,      ! L L 9 F N N * 4'' 	7 7	 7B LQLL5:\\
\\@  6 , u|| X]XdXd 8g299 gTf f   <u|| U\\ VY ^c^j^j (uryy up	")) B"RYY "X %II%<<% 
% <<	%
 U\\*% % %.;)BII ;)|+RYY +U\\ e T V[VbVb (%299 %bii &(BII ("0* 0fbll RYY 2	RYY 	299 " $0/ $0 $0N 
R#6 R
Rj !"@
Ar-   