
    rh              
          d Z ddlZddlmZmZ ddlZddlmZm	Z	m
Z
 ddlmZmZmZ ddlmZmZmZmZmZmZmZmZmZmZ ddlmZmZmZmZmZm Z   e jB                  e"      Z# e       rddl$Z$ddl%m&c m'Z( 	 	 dd	d
dee)e*e*f      dee*   fdZ+dde,e*e*f   fdZ-d Z.ddZ/	 	 	 dde0de0dee,e*e*f      fdZ1de)e2e*f   de,e*e*f   fdZ3 G d de      Z4dgZ5y)zImage processor class for EoMT.    N)OptionalUnion   )BaseImageProcessorBatchFeatureget_size_dict)PaddingModepadresize)
ChannelDimension
ImageInputPILImageResamplingget_image_sizeinfer_channel_dimension_formatmake_flat_list_of_imagesmake_list_of_imagesto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD
TensorTypefilter_out_non_signature_kwargsis_torch_availableloggingsegmentation_mapz
np.ndarrayinstance_id_to_semantic_idignore_indexc                 4   |t        j                  | dk(  || dz
        } t        j                  |       }||||k7     }|D cg c]  }| |k(  	 }}|rt        j                  |d      }n#t        j                  dg| j
                        }|Et        j                  |j
                  d         }|D ]  }|||dz   n|   }||dz
  n||||k(  <    n|}|j                  t         j                        |j                  t         j                        fS c c}w )Nr      )axis)	npwhereuniquestackzerosshapeastypefloat32int64)	r   r   r   
all_labelsibinary_maskslabelslabelclass_ids	            /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/eomt/image_processing_eomt.py(convert_segmentation_map_to_binary_masksr2   :   s7   
 88$4$9<IY\]I]^ +,J 
l :;
 6@@%*@L@ xx15xx <%5%;%; <= "-***1-. 	aE1|?W%!)]bcH:F:R(Q,X`F:&'	a rzz*FMM"((,CCC% As   Dreturnc                    | \  }}d}|St        t        ||f            }t        t        ||f            }||z  |z  |kD  r||z  |z  }t        t	        |            }||k  r||k(  s
||k  r||k(  r||}	}||	fS ||k  r0|}	||t	        ||z  |z        }||	fS t	        ||z  |z        }||	fS |}||t	        ||z  |z        }	||	fS t	        ||z  |z        }	||	fS )aC  
    Computes the output image size given the input image size and the desired output size.

    Args:
        image_size (`tuple[int, int]`):
            The input image size.
        size (`int`):
            The desired output size.
        max_size (`int`, *optional*):
            The maximum allowed output size.
    N)floatminmaxintround)

image_sizesizemax_sizeheightwidthraw_sizemin_original_sizemax_original_sizeohows
             r1   get_size_with_aspect_ratiorD   _   sF    MFEH!#vuo"67!#vuo"670047(B"336GGHuX'D%FdNETMB 8O 
H$8x&(501B 8O tf}u,-B 8O H$8x%'&01B 8O te|f,-B8O    c                     | j                   d   |j                   d   cxk(  r|j                   d   k(  st        d       t        d      |j                  |      ||kD  z  }| |   ||   ||   fS )a	  
    Binarize the given masks using `object_mask_threshold`, it returns the associated values of `masks`, `scores` and
    `labels`.

    Args:
        masks (`torch.Tensor`):
            A tensor of shape `(num_queries, height, width)`.
        scores (`torch.Tensor`):
            A tensor of shape `(num_queries)`.
        labels (`torch.Tensor`):
            A tensor of shape `(num_queries)`.
        object_mask_threshold (`float`):
            A number between 0 and 1 used to binarize the masks.
    Raises:
        `ValueError`: Raised when the first dimension doesn't match in all input tensors.
    Returns:
        `tuple[`torch.Tensor`, `torch.Tensor`, `torch.Tensor`]`: The `masks`, `scores` and `labels` without the region
        < `object_mask_threshold`.
    r   z1mask, scores and labels must have the same shape!)r'   
ValueErrorne)masksscoresr.   object_mask_threshold
num_labelsto_keeps         r1   remove_low_and_no_objectsrN      sy    ( KKNfll1o@a@LMM ALMMii
#v0E'EFG>6'?F7O;;rE   c                     | |k(  }|j                         }||   |k\  }|j                         }||z  }	|	j                         }
|dkD  xr |dkD  xr |
dkD  }|r||z  }|j                         |kD  sd}||	fS )Nr   F)sumitem)mask_labels
mask_probskmask_thresholdoverlap_mask_area_thresholdmask_kmask_k_areaoriginal_maskoriginal_area
final_maskfinal_mask_areamask_exists
area_ratios                r1   check_segment_validityr_      s    AF**,K qM^3M!%%'M-'J nn&O/Oma&7OOa<OK =0
 #>>K
""rE   rU   rV   target_sizec                 b   || j                   d   n|d   }|| j                   d   n|d   }t        j                  ||ft        j                  | j                        dz
  }	g }
| j                         } |d d d d f   | z  j                  d      }d}i }t        |j                   d         D ]~  }||   j                         }t        || |||      \  }}|s+|r||v r||v r	||   |	|<   >|||<   ||	|<   t        ||   j                         d      }|
j                  |||d       |dz  } |	|
fS )Nr    r      )dtypedevice   idlabel_idscore)r'   torchr&   longrd   sigmoidargmaxrangerQ   r_   r9   append)rS   pred_scorespred_labelsstuff_classesrU   rV   r`   r=   r>   segmentationsegmentsrR   current_segment_idstuff_memory_listrT   
pred_classr]   r[   segment_scores                      r1   compute_segmentsry      s    %0$7Za [^F#.#6JQKNE;;ejjIZIZ[^__LH ##%Jq$}-
:BB1EK (*;$$Q'(   ^((*
 #9Q8S#
Z Z=8..+<Z+HZ(0B!*-#5Z k!n113Q7(&&	
 	a7 8 !!rE   	size_dictc                 >    | d   }| j                  d      xs |}||fS )z.Returns the height and width from a size dict.shortest_edgelongest_edge)get)rz   target_heighttarget_widths      r1   get_target_sizer      s*    o.M==0AML,&&rE   c            '           e Zd ZdZdgZddej                  dddddddddfdedee	e
ef      d	ed
edededededeeeee   f      deeeee   f      dee   dee   f fdZej                  ddfdej"                  de	d	edeee
ef      dej"                  f
dZdede	dedeeef   fdZdede	dej"                  fdZ	 	 	 	 	 	 	 	 	 	 	 	 d2dedee   dee	e
ef      d	edee   dee   d
ee   dee   dee   deeeee   f      deeeee   f      deee
ef      deee
ef      dej"                  fdZ	 	 	 	 	 	 d3dedee   dee   dee	e
ef      d	edee
ef   deee
ef      dej"                  fdZ e       ddddddddddddddej6                  dfded eeee	eef      e	eef   f      d!ee	eef      dee   dee   dee	e
ef      d	ed
ee   dee   dee   dee   deeeee   f      deeeee   f      dee   d"eee
ef      dee
ef   deee
ef      def$d#       Z	 	 	 	 	 d4d$ee   d ed!eeee	eef      e	eef   f      dee   d"eee
ef      deee
ef      fd%Zd&e jB                  d'eeeeef      d(eeeef      de	e
ef   dee jB                     f
d)Z"d&e jB                  d(eeeef      de	e
ef   dee jB                     fd*Z#	 d5d(eeeef      dee	e
ef      dej"                  fd+Z$	 	 	 	 	 d6d(eeeef      d,ed-ed.ed/eee      dee	e
ef      fd0Z% e       	 	 d7d(eeeef      d,edee	e
ef      fd1       Z& xZ'S )8EomtImageProcessoraY  
    Constructs a EoMT image processor. The image processor can be used to prepare image(s) and optional targets
    for the model.

    This image processor inherits from [`BaseImageProcessor`] which contains most of the main methods. Users should
    refer to this superclass for more information regarding those methods.

    Args:
        do_resize (`bool`, *optional*, defaults to `True`):
            Whether to resize the input to a certain `size`.
        size (`int`, *optional*, defaults to 640):
            Resize the input to the given size. Only has an effect if `do_resize` is set to `True`. If size is a
            sequence like `(width, height)`, output size will be matched to this. If size is an int, smaller edge of
            the image will be matched to this number. i.e, if `height > width`, then image will be rescaled to `(size *
            height / width, size)`.
        resample (`int`, *optional*, defaults to `Resampling.BILINEAR`):
            An optional resampling filter. This can be one of `PIL.Image.Resampling.NEAREST`,
            `PIL.Image.Resampling.BOX`, `PIL.Image.Resampling.BILINEAR`, `PIL.Image.Resampling.HAMMING`,
            `PIL.Image.Resampling.BICUBIC` or `PIL.Image.Resampling.LANCZOS`. Only has an effect if `do_resize` is set
            to `True`.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the input to a certain `scale`.
        rescale_factor (`float`, *optional*, defaults to `1/ 255`):
            Rescale the input by the given factor. Only has an effect if `do_rescale` is set to `True`.
        do_normalize (`bool`, *optional*, defaults to `True`):
            Whether or not to normalize the input with mean and standard deviation.
        do_split_image (`bool`, *optional*, defaults to `False`):
            Whether to split the input images into overlapping patches for semantic segmentation. If set to `True`, the
            input images will be split into patches of size `size["shortest_edge"]` with an overlap between patches.
            Otherwise, the input images will be padded to the target size.
        do_pad (`bool`, *optional*, defaults to `False`):
            Whether to pad the image. If `True`, will pad the patch dimension of the images in the batch to the largest
            number of patches in the batch. Padding will be applied to the bottom and right with zeros.
        image_mean (`int`, *optional*, defaults to `[0.485, 0.456, 0.406]`):
            The sequence of means for each channel, to be used when normalizing images. Defaults to the ImageNet mean.
        image_std (`int`, *optional*, defaults to `[0.229, 0.224, 0.225]`):
            The sequence of standard deviations for each channel, to be used when normalizing images. Defaults to the
            ImageNet std.
        ignore_index (`int`, *optional*):
            Label to be assigned to background pixels in segmentation maps. If provided, segmentation map pixels
            denoted with 0 (background) will be replaced with `ignore_index`.
        num_labels (`int`, *optional*):
            The number of labels in the segmentation map.
    pixel_valuesTNgp?F	do_resizer;   resample
do_rescalerescale_factordo_normalizedo_split_imagedo_pad
image_mean	image_stdr   rL   c                    t        |   di | ||nddd}t        |d      }|| _        || _        || _        || _        || _        || _        || _	        || _
        |	|	nt        | _        |
|
nt        | _        || _        || _        y )Ni  )r|   r}   Fdefault_to_square )super__init__r   r   r;   r   r   r   r   r   r   r   r   r   r   r   rL   )selfr   r;   r   r   r   r   r   r   r   r   r   rL   kwargs	__class__s                 r1   r   zEomtImageProcessor.__init__'  s      	"6"'tsTW-XTU;"	 $,(,(2(>*DY&/&;AU($rE   imageinput_data_formatr3   c           
      f    t        |      }t        ||d   |d         }t        d|||||dd|}|S )a  
        Resize an image. The shortest edge of the image is resized to size["shortest_edge"], with the longest edge
        resized to keep the input aspect ratio.

        Args:
            image (`np.ndarray`):
                Image to resize.
            resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`):
                Resampling filter to use when resiizing the image.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format of the image. If not provided, it will be the same as the input image.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format of the input image. If not provided, it will be inferred.
        r|   r}   T)r   r;   r   data_formatr   return_numpyr   )r   rD   r   )	r   r   r;   r   r   r   r   r:   output_sizes	            r1   r   zEomtImageProcessor.resizeI  sZ    . $E*
0T/=RTXYgThi 
#/
 
 rE   image_indexc                    g g }}t        |      }|d   }t        |      }t        j                  ||z        }	|	|z  |z
  }
|	dkD  r|
|	dz
  z  nd}t	        |	      D ]e  }t        |||z
  z        }||z   }|d   |d   kD  r|dd||ddf   }n|dddd||f   }|j                  |       |j                  |||g       g ||fS )zCSlices an image into overlapping patches for semantic segmentation.r|   r    r   N)r   r7   mathceilrn   r8   ro   )r   r   r;   r   patchespatch_offsetsr:   
patch_sizelonger_sidenum_patchestotal_overlapoverlap_per_patchr,   startendpatchs                   r1   _split_imagezEomtImageProcessor._split_imageo  s     "$R#E*
/*
*oiij 89#j0;>ALqM[1_=VW{# 
	<AZ*;;<=E*$C!}z!},asAo.aE#Io.NN5!  +uc!:;
	< %%rE   c                     t        |      \  }}t        |      \  }}t        d||z
        }t        d||z
        }d|fd|ff}	t        ||	t        j
                  d      }
|
S )z5Pads the image to the target size using zero padding.r   g        )r   paddingmodeconstant_values)r   r   r7   r
   r	   CONSTANT)r   r   r;   r=   r>   r   r   pad_hpad_wr   padded_images              r1   _padzEomtImageProcessor._pad  sr    &u-&5d&;#|A}v-.A|e+,u:5z* k>R>RdghrE   imagesr   c           
      ,   |D cg c]  }t        |       }}|r#|D cg c]  }| j                  |||||       }}g g }}|rMt        |      D ]=  \  }}| j                  |||      \  }}|j	                  |       |j	                  |       ? |}|r|D cg c]  }| j                  ||       }}|r!|D cg c]  }| j                  |||       }}|	r"|D cg c]  }| j                  ||
||       }}||fS c c}w c c}w c c}w c c}w c c}w )zPreprocesses a batch of images.)r;   r   r   r   )scaler   )meanstdr   )r   r   	enumerater   extendr   rescale	normalize)r   r   r   r;   r   r   r   r   r   r   r   r   r   r   r   processed_imagesr   idximgr   offsetss                        r1   _preprocess_imagesz%EomtImageProcessor._preprocess_images  sn   " 6<<E.'<< $	  % +&7  	F 	 +-b-%f- .S#'#4#4S$#D  ''0$$W-.
 &F6<=sdiiT*=F=ntugjdll3nPalbuFu $  #!&7	  F  }$$S =	, > vs   C=DD8DDr   c                    |j                   dk(  rd}|d   }t        j                  }nd}|t        |      }|r| j	                  ||||      }|r| j                  ||      }|r|j                  d      }t        j                  |      S )zPreprocesses a single mask.rb   TN.F)r;   r   r   r   )	ndimr   FIRSTr   r   r   squeezerj   
from_numpy)	r   r   r   r   r;   r   r   r   added_channel_dims	            r1   _preprocess_maskz#EomtImageProcessor._preprocess_mask  s       A% $/	: 0 6 6 % ($BCS$T!#{{ !'	  +   #yy)94@ /77: 011rE   segmentation_mapsr   return_tensorsc                 ~   ||n| j                   }||n| j                  }||n| j                  }t        |d      }||n| j                  }||n| j
                  }|	|	n| j                  }	|
|
n| j                  }
||n| j                  }||n| j                  }||n| j                  }||n| j                  }t        |      }t        |      st        d      t        ||	|
|||||       | j!                  ||||||||	|
||||      \  }}|Xt#        |d      }|D cg c]  }t%        |       }}|D cg c](  }| j'                  ||||t(        j*                  ||      * }}| j-                  ||||||	      }|r(|r&|D cg c]  }t/        j0                  |       c}|d
<   |S c c}w c c}w c c}w )aZ  
        Preprocesses images or a batch of images.

        Args:
            images (`ImageInput`):
                Image or batch of images to preprocess.
            segmentation_maps (`ImageInput`, *optional*):
                The corresponding semantic segmentation maps with the pixel-wise annotations.
            instance_id_to_semantic_id (`list[dict[int, int]]` or `dict[int, int]`, *optional*):
                A mapping between object instance ids and class ids.
            do_split_image (`bool`, *optional*, defaults to `self.do_split_image`):
                Whether to split the input images into overlapping patches for semantic segmentation.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the input images.
            size (`dict[str, int]`, *optional*, defaults to `self.size`):
                Target size as a dictionary with `"shortest_edge"` and `"longest_edge"` keys.
            resample (`PILImageResampling`, *optional*, defaults to `self.resample`):
                Resampling filter to use when resizing.
            do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
                Whether to rescale the input images by `rescale_factor`.
            rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
                Factor to scale image pixel values.
            do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
                Whether to normalize the input images.
            do_pad (`bool`, *optional*, defaults to `False`):
                Whether to pad the image. If `True`, will pad the patch dimension of the images in the batch to the largest
                number of patches in the batch. Padding will be applied to the bottom and right with zeros.
            image_mean (`float` or `list[float]`, *optional*, defaults to `self.image_mean`):
                Mean for normalization. Single value or list for each channel.
            image_std (`float` or `list[float]`, *optional*, defaults to `self.image_std`):
                Standard deviation for normalization. Single value or list for each channel.
            ignore_index (`int`, *optional*):
                Label to be assigned to background pixels in segmentation maps. If provided, segmentation map pixels
                denoted with 0 (background) will be replaced with `ignore_index`.
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return. Can be `"pt"`, `"tf"`, `"np"`, or `"jax"`.
            data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
                Channel format of the output image. Either `"channels_first"` or `"channels_last"`.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                Channel format of the input image.
        Fr   zkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.)r   r   r   r   r   r   r;   r   )r   r   r;   r   r   r   r   r   r   r   r   r   r   rb   )expected_ndims)r   r   r;   r   r   r   )r   r   )r   r   r;   r   r   r   r   r   r   r   r   r   r   r   rG   r   r   r   r   r   r   NEARESTencode_inputsrj   tensor)r   r   r   r   r   r   r;   r   r   r   r   r   r   r   r   r   r   r   pixel_values_listr   maskr   encoded_inputsr   s                           r1   
preprocesszEomtImageProcessor.preprocess  s6   ~ ,:+E4K^K^!*!6IDNN	'tTYYTU;'38#-#9Zt
+9+E4K^K^'3'?|TEVEV!-4;;#-#9Zt
!*!6IDNN	'3'?|TEVEV)&1F#: 
 	&!)%!		
 ,0+B+B)!)%!#/ ,C ,
(=  ( 34EVW XBS T$!5 T T ):! % %%$'!/77 +&7 & ! ! ++&) , 
 mTa.bu||G/D.bN?+7 !U!. /cs   &F0>-F5F:r   c                    || j                   n|}|D cg c]  }t        |       }}|t        |d         }t        d|i|      }|g }	g }
t	        |      D ]  \  }}t        |      }t        |t              r||   }n|}t        |||      \  }}|	j                  t        j                  |             |
j                  t        j                  |              |	|d<   |
|d<   |S c c}w )a
  
        Pad images up to the largest image in a batch and create a corresponding `pixel_mask`.

        EoMT addresses semantic segmentation with a mask classification paradigm, thus input segmentation maps
        will be converted to lists of binary masks and their respective labels. Let's see an example, assuming
        `segmentation_maps = [[2,6,7,9]]`, the output will contain `mask_labels =
        [[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]]` (four binary masks) and `class_labels = [2,6,7,9]`, the labels for
        each mask.

        Args:
            pixel_values_list (`list[ImageInput]`):
                list of images (pixel values) to be padded. Each image should be a tensor of shape `(channels, height,
                width)`.

            segmentation_maps (`ImageInput`, *optional*):
                The corresponding semantic segmentation maps with the pixel-wise annotations.

             (`bool`, *optional*, defaults to `True`):
                Whether or not to pad images up to the largest image in a batch and create a pixel mask.

                If left to the default, will return a pixel mask that is:

                - 1 for pixels that are real (i.e. **not masked**),
                - 0 for pixels that are padding (i.e. **masked**).

            instance_id_to_semantic_id (`list[dict[int, int]]` or `dict[int, int]`, *optional*):
                A mapping between object instance ids and class ids. If passed, `segmentation_maps` is treated as an
                instance segmentation map where each pixel represents an instance id. Can be provided as a single
                dictionary with a global/dataset-level mapping or as a list of dictionaries (one per image), to map
                instance ids in each image separately.

            return_tensors (`str` or [`~file_utils.TensorType`], *optional*):
                If set, will return tensors instead of NumPy arrays. If set to `'pt'`, return PyTorch `torch.Tensor`
                objects.

            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format of the input image. If not provided, it will be inferred.

        Returns:
            [`BatchFeature`]: A [`BatchFeature`] with the following fields:

            - **pixel_values** -- Pixel values to be fed to a model.
            - **mask_labels** -- Optional list of mask labels of shape `(labels, height, width)` to be fed to a model
              (when `annotations` are provided).
            - **class_labels** -- Optional list of class labels of shape `(labels)` to be fed to a model (when
              `annotations` are provided). They identify the labels of `mask_labels`, e.g. the label of
              `mask_labels[i][j]` if `class_labels[i][j]`.
        r   r   )tensor_type)r   rR   class_labels)r   r   r   r   r   
isinstancelistr2   ro   rj   r   )r   r   r   r   r   r   r   r   r   rR   r   r   r   instance_idrI   classess                   r1   r   z EomtImageProcessor.encode_inputs  s   r -9,@t((lN_`l^L9``$ >?PQR?S T%~7H&IWef(KL)23D)E ?%%#12B#C 8$?"<S"AK"<K!I$!-"w ""5#3#3E#:;##E$4$4W$=>?" -8N=)-9N>*? as   C)segmentation_logitsr   target_sizesc                 ,   |j                   d   }g }g }|D ]~  }t        ||d   |d         \  }	}
|j                  t        j                  ||	|
f|j
                               |j                  t        j                  ||	|
f|j
                                t        |      D ]  \  }\  }}}||   d   ||   d   kD  r6||   dd||ddfxx   ||   z  cc<   ||   dd||ddfxx   dz  cc<   Q||   dddd||fxx   ||   z  cc<   ||   dddd||fxx   dz  cc<    g }t        t        ||            D ]P  \  }\  }}||j                  d      z  }t        j                  |d   ||   d	d
      d   }|j                  |       R |S )a  
        Reconstructs full-size semantic segmentation logits from patch predictions.

        Args:
            segmentation_logits (`torch.Tensor`):
                A tensor of shape `(num_patches, num_classes, patch_height, patch_width)` representing predicted logits
                for each image patch.
            patch_offsets (`list[tuple[int, int, int]]`):
                A list of tuples where each tuple contains:
                - `image_index` (int): Index of the original image this patch belongs to.
                - `start` (int): Start pixel index of the patch along the long dimension (height or width).
                - `end` (int): End pixel index of the patch along the long dimension.
            target_sizes (`list[tuple[int, int]]`):
                list of original (height, width) dimensions for each image before preprocessing.
            size (`dict[str, int]`):
                A size dict which was used to resize.
        r    r|   r}   rd   r   N)r6   r   bilinearFr;   r   align_corners)r'   rD   ro   rj   r&   rd   r   zipclampFinterpolate)r   r   r   r   r;   num_classesaggregated_logitspatch_countsr:   r=   r>   	patch_idx	image_idxpatch_start	patch_endreconstructed_logitsr   	logit_sumcountaveraged_logitsresized_logitss                        r1   merge_image_patchesz&EomtImageProcessor.merge_image_patches  s   0 *//2& 	nJ6z4CXZ^_mZnoMFE$$U[[+vu1MViVpVp%qr[&%,HQdQkQk lm	n ?H>V 	J:I:	;	I&q)L,CA,FF!),QI0Eq-HIM`ajMkkIY';y+@!(CDID!),Q;y3H-HIM`ajMkkIY'1k).C(CDID	J  "'05F1U'V 		8#C#)U'%++!+*<<O]]	*!#&#	
 N !''7		8 $#rE   c                     g }t        |      D ]\  \  }}t        ||d   |d         \  }}||   ddd|d|f   }	t        j                  |	d   |dd      d   }
|j	                  |
       ^ |S )	zJRestores panoptic segmentation logits to their original image resolutions.r|   r}   Nr   r   Fr   r   )r   rD   r   r   ro   )r   r   r   r;   r   r   original_sizer   r   cropped_logitsupsampled_logitss              r1   unpad_imagezEomtImageProcessor.unpad_image  s     "+L"9 	4C*DtO4d>6J+'M< 15a-,6VWN }}y)J^c   !!"23	4 rE   c                    ||n| j                   }|j                  }|j                  }|j                  }t	        |      }t        j                  ||d      }|j                  d      dddf   }|j                         }	t        j                  d||	      }
| j                  |
|||      }|D cg c]  }|j                  d       }}|S c c}w )	zIPost-processes model outputs into final semantic segmentation prediction.Nr   r;   r   dim.zbqc, bqhw -> bchwr   )r;   masks_queries_logitsclass_queries_logitsr   r   r   r   softmaxrl   rj   einsumr   rm   )r   outputsr   r;   r   r   r   r   masks_classesmasks_probsr   output_logitslogitpredss                 r1   "post_process_semantic_segmentationz5EomtImageProcessor.post_process_semantic_segmentation1  s     'tTYY&;;&;;--%d+ }}  
 -444<S#2#XF*224#ll+>{[001DmUacgh2?@!$@@ As   0C	thresholdrU   rV   rr   c                    ||n| j                   }|j                  }|j                  }	|	j                  d   }
|	j                  d   dz
  }t	        |      }t        j                  ||d      }| j                  |||      }|	j                  d      j                  d      \  }}g }t        |
      D ]  }t        ||   ||   ||   ||      \  }}}|j                  d   dk  rH|||   n|j                  dd \  }}t        j                  ||f      dz
  }|j                  |g d       yt        |||||||||   nd	      \  }}|j                  ||d        |S )
zIPost-processes model outputs into final panoptic segmentation prediction.Nr   r   r    r   r   r   rs   segments_info)rS   rp   rq   rr   rU   rV   r`   )r;   r   r   r'   r   r   r   r   r  r7   rn   rN   rj   r&   ro   ry   )r   r  r   r
  rU   rV   rr   r;   r   r   
batch_sizerL   r   mask_probs_batchpred_scores_batchpred_labels_batchresultsr,   rS   rp   rq   r=   r>   rs   rt   s                            r1   "post_process_panoptic_segmentationz5EomtImageProcessor.post_process_panoptic_segmentationQ  s    'tTYY&;;&;;)//2
)//3a7
%d+ }}  
  ++,@,PTU/C/K/KPR/K/S/W/WXZ/[,,z" 	VA3L #%6q%9;LQ;OQZ\f40J[
 "a'3?3KQQ[QaQabcbdQe${{FE?;a?rRS%5%''+-,G/;/GLOT&"L( NNL8TU-	V. rE   c           
      @   ||n| j                   }|j                  }|j                  }t        |      }t	        j
                  ||d      }| j                  |||      }|j                  }	|j                  d   }
|j                  d   }g }t        |
      D ]  }||   }||   }|j                  d      dddf   j                  d      \  }}|dkD  j                         }|j                         j                  d	      |j                  d	      z  j                  d	      |j                  d	      j                  d	      d
z   z  }||z  }t!        j"                  ||   |	      d	z
  }g g }}d}t        |      D ]  }||   j%                         }t!        j&                  ||   dk(        r2||k\  s8||||   d	k(  <   |j)                  |||   j%                         t+        |d      d       |d	z  }|j)                  ||           |j)                  ||d        |S )zDPost-processes model outputs into Instance Segmentation Predictions.Nr   r   r   r   r   .r    gư>r   re   rf   r  )r;   r   r   r   r   r   r   rd   r'   rn   r  r7   r5   rl   flattenrP   rj   r&   rQ   allro   r9   )r   r  r   r
  r;   r   r   r   r  rd   r  num_queriesr  r,   	mask_pred
mask_classrJ   pred_classes
pred_masksmask_scoresrp   rs   instance_mapsrt   ru   jri   s                              r1   "post_process_instance_segmentationz5EomtImageProcessor.post_process_instance_segmentation  sP    'tTYY&;;&;;%d+ }}  
  ++,@,PTU%,,)//2
*004z" !	VA(+I-a0J $.#5#5"#5#=c3B3h#G#K#KB#O FL#a-..0J %,,.66q9J<N<Nq<QQVVWXY""1%))!,t3K !;.K ;;|AvFJL&("8M!";' 8#A++-yyA!!34)9K7ILA!!34OO"4(4Q(<(<(>%*5!_ '!+&!((A78 NNL8TUC!	VD rE   )NNNNNNNNNNNN)FFNNNN)NNNNNN)皙?      ?r"  NN)r#  N)(__name__
__module____qualname____doc__model_input_namesr   BILINEARboolr   dictstrr8   r5   r   r   r   r"   ndarrayr   r   r   tupler   r   r   r   r   r   r   r   r   r   rj   Tensorr   r   r	  r  r   __classcell__)r   s   @r1   r   r      s   +Z (( )-'9'B'B '!$:>9=&*$( % % tCH~& % %	 %
  %  %  %  %  % U5$u+#567 % E%e"456 % sm % SM %L (:'B'BDH$zz$ $ %	$ $E#/?*?$@A$ 
$L&* &D &s &uUY[_U_O` &6* D RZZ " %))-'+)-!%%)*.'+:>9=>BDH:%:% D>:% tCH~&	:%
 %:% !:% :% TN:% !:% tn:% U5$u+#567:% E%e"456:% eC)9$9:;:% $E#/?*?$@A:% 
:%~ %*!&)-'+48DH#2$#2 D>#2 	#2
 tCH~&#2 %#2 3 001#2 $E#/?*?$@A#2 
#2J %& TX?C)-$()-'+%)*.'+!%:>9=&*;?4D4J4JDH%KK $E$tCH~*>S#X*N$OPK %-T#s(^$<	K
 !K D>K tCH~&K %K TNK !K tnK K U5$u+#567K E%e"456K smK  !sJ!78!K" 3 001#K$ $E#/?*?$@A%K& 
'K 'K` )-\`&*;?DHZ
+Z &Z %-U4S#X3GcSVh3W-X$Y	Z
 smZ !sJ!78Z $E#/?*?$@AZx7$"\\7$ E#sC-017$ 5c?+	7$
 38n7$ 
ell	7$r"\\ 5c?+ 38n	
 
ell	2 *.	 5c?+ tCH~&	
 
H  #-0-1)-7 5c?+7 	7
 7 &+7  S	*7 tCH~&7r %&
 )-? 5c?+? 	?
 tCH~&? '?rE   r   )NNr!  )r#  r"  )r#  r"  N)6r'  r   typingr   r   numpyr"   image_processing_utilsr   r   r   image_transformsr	   r
   r   image_utilsr   r   r   r   r   r   r   r   r   r   utilsr   r   r   r   r   r   
get_loggerr$  loggerrj   torch.nn.functionalnn
functionalr   r+  r8   r2   r.  rD   rN   r_   r5   ry   r,  r   r   __all__r   rE   r1   <module>r=     s5   &  "  U U 
    
		H	%## <@"&"D""D (c3h 8"D 3-"DJ$5c? $P<8#6  ),-13"
 3" "'3" %S/*3"l'tCH~ '%S/ 'S+ Sl  
 rE   