
    rh`                        d dl mZ d dlmZmZ d dlZddlmZm	Z	m
Z
mZ ddlmZmZmZmZmZ ddlmZmZmZmZmZmZmZmZmZmZ ddlmZmZ  ej@                  e!      Z"d	ejF                  d
e$de%ejF                     fdZ& G d de      Z'dgZ(y)    )Iterable)OptionalUnionN   )BaseImageProcessorBatchFeatureget_patch_output_sizeselect_best_resolution)PaddingModeconvert_to_rgbpadresizeto_channel_dimension_format)
ChannelDimension
ImageInputPILImageResamplingget_image_sizeinfer_channel_dimension_formatis_scaled_imagemake_flat_list_of_imagesto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)
TensorTypeloggingimage
patch_sizereturnc                    g }t        | |      \  }}t        d||      D ]^  }t        d||      D ]L  }|t        j                  k(  r| |||z   |||z   f   }n| dd|||z   |||z   f   }|j	                  |       N ` |S )a  
    Divides an image into patches of a specified size.

    Args:
        image (`np.array`):
            The input image.
        patch_size (`int`):
            The size of each patch.
        input_data_format (`ChannelDimension` or `str`):
            The channel dimension format of the input image.

    Returns:
        list: A list of np.array representing the patches.
    channel_dimr   N)r   ranger   LASTappend)	r   r   input_data_formatpatchesheightwidthijpatchs	            /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/aria/image_processing_aria.pydivide_to_patchesr-   .   s     G"56GHMFE1fj) "q%, 	"A $4$9$99a!j.0!a*n2DDEaQ^!3QZ5GGHNN5!	"" N    c                        e Zd ZdZg dZddddddddddej                  fd	eee	      d
eee	      de
de
deeee
e
f         dee   dee   dedee
e	f   dee   def fdZdddddddddddej                   dfdeeee   f   d	eee	ee	   f      d
eee	ee	   f      dee
   dee
   dee   dee   dee   dee	   dee   dedeeeef      dee   deeeef      fdZdej,                  dededej,                  fdZdedefd Zdej,                  dededej,                  fd!Zej6                  d"ddfdej8                  d#ee
ee
e
f   eee
e
f      f   d$ed%ee	ee	   f   deeeef      deeeef      dej8                  fd&Zdej,                  d'eee
e
f      d(e
dedededeej,                     fd)Zd-d*e
d+e
fd,Z  xZ!S ).AriaImageProcessoraG  
    A vision processor for the Aria model that handles image preprocessing.
    Initialize the AriaImageProcessor.

    Args:
        image_mean (`list`, *optional*, defaults to [0.5, 0.5, 0.5]):
            Mean values for normalization.
        image_std (`list`, *optional*, defaults to [0.5, 0.5, 0.5]):
            Standard deviation values for normalization.
        max_image_size (`int`, *optional*, defaults to 980):
            Maximum image size.
        min_image_size (`int`, *optional*, defaults to 336):
            Minimum image size.
        split_resolutions (`list`, *optional*, defaults to a list of optimal,resolutions as tuples):
            The optimal resolutions for splitting the image.
        split_image (`bool`, *optional*, defaults to `False`):
            Whether to split the image.
        do_convert_rgb (`bool`, *optional*, defaults to `True`):
            Whether to convert the image to RGB.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in
            the `preprocess` method.
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
            method.
        do_normalize (`bool`, *optional*, defaults to `True`):
            Whether to normalize the image.
        resample (PILImageResampling, *optional*, defaults to `BICUBIC`):
            The resampling filter to use if resizing the image.
    pixel_values
pixel_mask	num_cropsN  iP  FTgp?
image_mean	image_stdmax_image_sizemin_image_sizesplit_resolutionssplit_imagedo_convert_rgb
do_rescalerescale_factordo_normalizeresamplec                 &   t        |   di | |g d}|g d}|| _        || _        || _        || _        || _        |!g d}|D cg c]  }|d   dz  |d   dz  f }}|| _        || _        || _	        |	| _
        |
| _        || _        y c c}w )N)      ?rB   rB   ))      )rC   r   )rC      )rC      )rC      )rC      )rC      )rD   rE   )rD   r   )rD   rD   )rD   rC   )r   rC   )r   rD   )rE   rC   )rE   rD   )rF   rC   )rG   rC   )rH   rC   )rI   rC   r     rC    )super__init__r8   r9   r6   r7   r;   r:   r<   r=   r>   r?   r@   )selfr6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   kwargsel	__class__s                 r,   rM   zAriaImageProcessor.__init__l   s     	"6"(J'I,,$"&$ !yFW X"Q%#+r!us{!; X X!2,$,(  !Ys   
Bptimagesreturn_tensorsdata_formatr%   c           	      .   ||n| j                   }||n| j                  }||n| j                  }||n| j                  }||n| j                  }||n| j
                  }||n| j                  }|	|	n| j                  }	|
|
n| j                  }
||n| j                  }|dvrt        d      t        |      }t        |      st        d      t        |
|||||	       |r|D cg c]  }t        |       }}|D cg c]  }t        |       }}|r#t!        |d         rt"        j%                  d       |t'        |d         }g }g }d}|D ]}  }|r"| j)                  || j*                  ||||      }n|g}|t-        |      |kD  rt-        |      }|D ]2  }t/        |      \  }}|t1        ||      z  }||k\  rt1        t3        ||z        |      |f}n|t1        t3        ||z        |      f}t5        |||||	      }||d   z
  ||d
   z
  }}t7        |d|fd|ff||      }t9        j:                  ||ft<              }d
|d|d   d|d
   f<   |j?                  |       |r| jA                  ||	|      }|
r;| jC                  || j                   | j                  ||      }|tE        |||      n|}|j?                  |       5  tG        t9        jH                  |d      t9        jH                  |d      |d|      S c c}w c c}w )aI  
        Process a list of images.

        Args:
            images (ImageInput or list of ImageInput):
                The input image or a list of images.
            image_mean (`list`, *optional*, defaults to [0.5, 0.5, 0.5]):
                Mean values for normalization.
            image_std (`list`, *optional*, defaults to [0.5, 0.5, 0.5]):
                Standard deviation values for normalization.
            max_image_size (`int`, *optional*, defaults to `self.max_image_size` (980)):
                Maximum image size.
            min_image_size (`int`, *optional*, defaults to `self.min_image_size` (336)):
                Minimum image size.
            split_image (`bool`, *optional*, defaults to `self.split_image` (False)):
                Whether to split the image.
            do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb` (True)):
                Whether to convert the image to RGB.
            do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
                Whether to rescale the image.
            rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
                Rescale factor to rescale the image by if `do_rescale` is set to `True`.
            do_normalize (`bool`, *optional*, defaults to `self.do_normalize` (True)):
                Whether to normalize the image.
            resample (PILImageResampling, *optional*, defaults to `self.resample` (BICUBIC)):
                The resampling filter to use if resizing the image.
            return_tensors (`str` or `TensorType`, *optional*, defaults to "pt"):
                The type of tensor to return.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format for the output image. Can be one of:
                    - `"channels_first"` or `ChannelDimension.FIRST`:
                        image in (num_channels, height, width) format.
                    - `"channels_last"` or `ChannelDimension.LAST`:
                        image in (height, width, num_channels) format.
                If unset, will use same as the input image.
            input_data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format for the input image. Can be one of:
                    - `"channels_first"` or `ChannelDimension.FIRST`:
                        image in (num_channels, height, width) format.
                    - `"channels_last"` or `ChannelDimension.LAST`:
                        image in (height, width, num_channels) format.
                If unset, will use the inferred format of the input image.

        Returns:
            BatchFeature:
                A BatchFeature object containing:
                - 'pixel_values':
                    Tensor of processed image pixel values.
                - 'pixel_mask':
                    Boolean pixel mask. This mask is a 2D tensor of shape (max_image_size, max_image_size) where:
                    - True (1) values indicate pixels that belong to the original resized image.
                    - False (0) values indicate pixels that are part of the padding.
                  The mask helps distinguish between actual image content and padded areas in subsequent processing steps.
                - 'num_crops':
                    The maximum number of crops across all images.
        N)rJ   r5   z(max_image_size must be either 490 or 980zkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.)r?   r6   r7   r@   r=   r>   r   zIt looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.)rU   r%   )r@   rU   r%   rC   )dtype)r   scaler%   )axisr1   )datatensor_type)%r6   r7   r8   r9   r;   r<   r=   r>   r?   r@   
ValueErrorr   r   r   r   r   r   loggerwarning_oncer   get_image_patchesr:   lenr   maxintr   r   npzerosboolr$   rescale	normalizer   r   stack)rN   rS   r6   r7   r8   r9   r;   r<   r=   r>   r?   r@   rT   rU   r%   r   r2   pixel_masksr4   crop_images
crop_imagehwrX   new_sizecrop_image_resizedpadding_bottompadding_rightcrop_image_paddedr3   s                                 r,   
preprocesszAriaImageProcessor.preprocess   s   R $.#9Zt
!*!6IDNN	+9+E4K^K^+9+E4K^K^%0%<k$BRBR+9+E4K^K^#-#9Zt
+9+E4K^K^'3'?|TEVEV'38+GHH)&1F#: 
 	&%!!)	
 9?@nU+@F@ 6<<E.'<</&)4s
 $ >vay I	 @	7E"44**" 1&7 5   %g C$4y$@,	) 17
%j11&Q26 #CE	NN C^TH .CE	NN0STH%+% 1&7&" 1?!0Ln_ghi_jNj$'&(1m*<= 1&7	%!  XX~~&FdS
;<
=Xa[=-HQK-78"":.(,/~Yj )5 )%  (,)$5*; )7 )% '2 44E{Tef. & ##$56c17@	7B  "A > hh{;&
 '
 	
i A =s   (L Lr   target_resolutionr   c                 H    t        |||      \  }}t        |||f||      }|S )aC  
        Resizes an image to a target resolution while maintaining aspect ratio.

        Args:
            image (np.array):
                The input image.
            target_resolution (tuple):
                The target resolution (height, width) of the image.
            resample (`PILImageResampling`):
                Resampling filter to use if resizing the image.
            input_data_format (`ChannelDimension` or `str`):
                The channel dimension format of the input image.

        Returns:
            np.array: The resized and padded image.
        r@   r%   )r	   r   )rN   r   rt   r@   r%   
new_height	new_widthresized_images           r,   _resize_for_patchingz'AriaImageProcessor._resize_for_patchingV  s7    & !6e=NPa b
I uz9&=duvr.   original_resolutionc                 z    |\  }}|\  }}t        ||z
  d      \  }}t        ||z
  d      \  }	}
|	|	|
z   f|||z   ffS )NrD   )divmod)rN   r{   rt   original_heightoriginal_widthtarget_heighttarget_widthpaste_xr_xpaste_yr_ys              r,   _get_padding_sizez$AriaImageProcessor._get_padding_sizep  s]    *='&7#|l^;Q?mo=qA3''7S=)AAAr.   c                 j    t        |||      }| j                  ||      }| j                  ||      }|S )zU
        Pad an image to a target resolution while maintaining aspect ratio.
        )padding)r	   r   r   )rN   r   rt   r%   new_resolutionr   padded_images          r,   _pad_for_patchingz$AriaImageProcessor._pad_for_patchingw  s?     /u6GIZ[((9JKxxwx7r.   g        r   modeconstant_valuesc                 ^   t        |t              st        |      dk7  rt        ||||||      S |t	        |      }t
        j                  dt
        j                  dt
        j                  dt
        j                  di}t        j                  ||||   |      }|t        |||      }|S |}|S )a	  
        Pads the `image` with the specified `padding` and `mode`. Padding can be in the (`height`, `width`)
        dimension of in the (`num_patches`) dimension. In the second case an iterable if tuples is expected
        as input.

        Args:
            image (`np.ndarray`):
                The image to pad.
            padding (`int` or `tuple[int, int]` or `Iterable[tuple[int, int]]`):
                Padding to apply to the edges of the height, width axes. Can be one of three formats:
                - `((before_height, after_height), (before_width, after_width))` unique pad widths for each axis.
                - `((before, after),)` yields same before and after pad for height and width.
                - `(pad,)` or int is a shortcut for before = after = pad width for all axes.
            mode (`PaddingMode`):
                The padding mode to use. Can be one of:
                    - `"constant"`: pads with a constant value.
                    - `"reflect"`: pads with the reflection of the vector mirrored on the first and last values of the
                    vector along each axis.
                    - `"replicate"`: pads with the replication of the last value on the edge of the array along each axis.
                    - `"symmetric"`: pads with the reflection of the vector mirrored along the edge of the array.
            constant_values (`float` or `Iterable[float]`, *optional*):
                The value to use for the padding if `mode` is `"constant"`.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format for the output image. Can be one of:
                    - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                    - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                If unset, will use same as the input image.
            input_data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format for the input image. Can be one of:
                    - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                    - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                If unset, will use the inferred format of the input image.

        Returns:
            `np.ndarray`: The padded image.

        rE   constantreflectedge	symmetric)r   r   )
isinstancerb   r`   r   r   r   CONSTANTREFLECT	REPLICATE	SYMMETRICrc   r   )rN   r   r   r   r   rU   r%   padding_mode_mappings           r,   r   zAriaImageProcessor.pad  s    ` gs#s7|q'8ugt_kK\]]$ >u E   *!!6!!;	 
 ug,@,FXghR]Ri'{<MN 	  pu 	 r.   grid_pinpointsr   c                     t        |t              st        d      |}t        ||      }t	        ||      }	| j                  ||	||      }
| j                  |
|	|      }t        |||      }|D cg c]  }t        |||       }}|S c c}w )aY  
        Process an image with variable resolutions by dividing it into patches.

        Args:
            image (`np.array`):
                The input image to be processed.
            grid_pinpoints (list[tuple[int, int]]):
                A list of possible resolutions as tuples.
            patch_size (`int`):
                Size of the patches to divide the image into.
            resample (`PILImageResampling`):
                Resampling filter to use if resizing the image.
            data_format (`ChannelDimension` or `str`):
                The channel dimension format for the output image.
            input_data_format (`ChannelDimension` or `str`):
                The channel dimension format of the input image.

        Returns:
            `list[np.array]`: A list of NumPy arrays containing the processed image patches.
        z6grid_pinpoints must be a list of possible resolutions.r    rv   )r%   )r   r%   )r!   input_channel_dim)	r   list	TypeErrorr   r
   rz   r   r-   r   )rN   r   r   r   r@   rU   r%   possible_resolutions
image_sizebest_resolutionry   r   r&   r+   s                 r,   r_   z$AriaImageProcessor.get_image_patches  s    : .$/TUU-#E7HI
0=QR11?XIZ 2 
 --m_`q-r#LZ[lm
 !
 (;Zkl
 
 	
s   2Br'   r(   c                     |j                  d| j                        }|j                  d| j                        }t        ||f| j                        \  }}|sd}|S ||z  |z  |z  }|S )a  
        A utility that returns number of image patches for a given image size.

        Args:
            height (`int`):
                Height of the input image.
            width (`int`):
                Width of the input image.
            images_kwargs (`dict`, *optional*)
                Any kwargs to override defaults of the image processor.
        Returns:
            `int`: Number of patches per image.
        r;   r8   rC   )getr;   r8   r
   r:   )	rN   r'   r(   images_kwargsr;   r8   resized_heightresized_widthnum_patchess	            r,   get_number_of_image_patchesz.AriaImageProcessor.get_number_of_image_patches  s|     $''t7G7GH&**+;T=P=PQ(>PTPfPf(g%*a 1?.0PS`0`dr0rr.   )N)"__name__
__module____qualname____doc__model_input_namesr   BICUBICr   r   floatrb   tuplere   r   rM   r   FIRSTr   strr   rs   rc   arrayrz   r   r   r   r   ndarrayr   r   r_   r   __classcell__)rQ   s   @r,   r0   r0   J   s   > D -1+/!!=A&+)-,3'+'9'A'A"!T%[)"! DK("! 	"!
 "! $DsCx$9:"! d^"! !"! "! c5j)"! tn"! %"!N ;?9=(,(,&*)-%)*.'+'+;?2B2H2HDHD
j$z"223D
 U5$u+#567D
 E%e"456	D

 !D
 !D
 d^D
 !D
 TND
 !D
 tnD
 %D
 !sJ!78D
 ./D
 $E#/?*?$@AD
LXX27Vf	4BU Bu BXX27L\	" (009<>BDH@zz@ sE#s(OXeCHo-FFG@ 	@
 uhuo56@ eC)9$9:;@ $E#/?*?$@A@ 
@D0xx0 U38_-0 	0
 %0 &0 ,0 
bhh0d# c r.   r0   ))collections.abcr   typingr   r   numpyrc   image_processing_utilsr   r   r	   r
   image_transformsr   r   r   r   r   image_utilsr   r   r   r   r   r   r   r   r   r   utilsr   r   
get_loggerr   r]   r   rb   r   r-   r0   __all__rK   r.   r,   <module>r      s   * % "  u u e e   ) 
		H	%RXX 3 dSUS[S[n 8A+ AH  
 r.   