
    rh8                        d Z ddlmZmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ dd	lmZ dd
lmZ ddlmZ  G d dej$                        Z G d dej$                        Z G d dej$                        Z G d dej$                        Z G d dej$                        Ze G d de             Z ed       G d de             ZddgZy)zrPyTorch UperNet model. Based on OpenMMLab's implementation, found in https://github.com/open-mmlab/mmsegmentation.    )OptionalUnionN)nn)CrossEntropyLoss   )SemanticSegmenterOutput)PreTrainedModel)auto_docstring)load_backbone   )UperNetConfigc                        e Zd ZdZ	 	 	 ddededeeeeef   f   deeeeef   ef   dedeeeeef   f   dd	f fd
Z	de
j                  de
j                  fdZ xZS )UperNetConvModulez
    A convolutional block that bundles conv/norm/activation layers. This block simplifies the usage of convolution
    layers, which are commonly used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU).
    in_channelsout_channelskernel_sizepaddingbiasdilationreturnNc                     t         |           t        j                  ||||||      | _        t        j
                  |      | _        t        j                         | _        y )N)r   r   r   r   r   r   )	super__init__r   Conv2dconvBatchNorm2d
batch_normReLU
activation)selfr   r   r   r   r   r   	__class__s          /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/upernet/modeling_upernet.pyr   zUperNetConvModule.__init__$   sQ     	II#%#
	 ..6'')    inputc                 l    | j                  |      }| j                  |      }| j                  |      }|S N)r   r   r   )r    r$   outputs      r"   forwardzUperNetConvModule.forward9   s1    5!((r#   )r   Fr   )__name__
__module____qualname____doc__intr   tuplestrboolr   torchTensorr(   __classcell__r!   s   @r"   r   r      s     5601$$ $ 3c3h/0	$
 sE#s(OS01$ $ U38_,-$ 
$*U\\ ell r#   r   c                   h     e Zd Zdedededdf fdZdej                  dej                  fdZ xZS )	UperNetPyramidPoolingBlock
pool_scaler   channelsr   Nc                     t         |           t        j                  |      t	        ||d      g| _        t        | j
                        D ]   \  }}| j                  t        |      |       " y )Nr   r   )	r   r   r   AdaptiveAvgPool2dr   layers	enumerate
add_moduler/   )r    r7   r   r8   ilayerr!   s         r"   r   z#UperNetPyramidPoolingBlock.__init__B   sa      ,k8C
 "$++. 	+HAuOOCFE*	+r#   r$   c                 <    |}| j                   D ]
  } ||      } |S r&   )r<   )r    r$   hidden_stater@   s       r"   r(   z"UperNetPyramidPoolingBlock.forwardK   s*    [[ 	/E .L	/r#   )	r)   r*   r+   r-   r   r1   r2   r(   r3   r4   s   @r"   r6   r6   A   s?    +3 +S +C +D +U\\ ell r#   r6   c            
            e Zd ZdZdeedf   dedededdf
 fd	Zd
ej                  de
ej                     fdZ xZS )UperNetPyramidPoolingModulea}  
    Pyramid Pooling Module (PPM) used in PSPNet.

    Args:
        pool_scales (`tuple[int]`):
            Pooling scales used in Pooling Pyramid Module.
        in_channels (`int`):
            Input channels.
        channels (`int`):
            Channels after modules, before conv_seg.
        align_corners (`bool`):
            align_corners argument of F.interpolate.
    pool_scales.r   r8   align_cornersr   Nc                    t         |           || _        || _        || _        || _        g | _        t        |      D ]I  \  }}t        |||      }| j                  j                  |       | j                  t        |      |       K y )N)r7   r   r8   )r   r   rE   rF   r   r8   blocksr=   r6   appendr>   r/   )	r    rE   r   r8   rF   r?   r7   blockr!   s	           r"   r   z$UperNetPyramidPoolingModule.__init__a   s    &*& &{3 	+MAz.*R]hpqEKKu%OOCFE*	+r#   xc                     g }| j                   D ]Y  } ||      }t        j                  j                  ||j	                         dd  d| j
                        }|j                  |       [ |S )N   bilinearsizemoderF   )rH   r   
functionalinterpolaterP   rF   rI   )r    rK   ppm_outsppmppm_outupsampled_ppm_outs         r"   r(   z#UperNetPyramidPoolingModule.forwardm   sn    ;; 	/C!fG " 9 9affhqrl4K]K] !: ! OO-.	/ r#   )r)   r*   r+   r,   r.   r-   r0   r   r1   r2   listr(   r3   r4   s   @r"   rD   rD   R   s[    
+E#s(O 
+# 
+QT 
+ei 
+nr 
+ $u||*< r#   rD   c                   `     e Zd ZdZ fdZd Zdej                  dej                  fdZ xZ	S )UperNetHeadz
    Unified Perceptual Parsing for Scene Understanding. This head is the implementation of
    [UPerNet](https://huggingface.co/papers/1807.10221).
    c                    t         |           || _        |j                  | _        || _        |j
                  | _        d| _        t        j                  | j                  |j                  d      | _        t        | j                  | j                  d   | j                  | j                        | _        t        | j                  d   t        | j                        | j                  z  z   | j                  dd      | _        t        j"                         | _        t        j"                         | _        | j                  d d D ]s  }t        || j                  d      }t        | j                  | j                  dd      }| j$                  j)                  |       | j&                  j)                  |       u t        t        | j                        | j                  z  | j                  dd      | _        y )NFr   r:   )rF   r   r   r   )r   r   configrE   r   hidden_sizer8   rF   r   r   
num_labels
classifierrD   psp_modulesr   len
bottleneck
ModuleListlateral_convs	fpn_convsrI   fpn_bottleneck)r    r^   r   l_convfpn_convr!   s        r"   r   zUperNetHead.__init__~   s   !--&**"))DMM63D3DRST 7R MM,,	
 ,R 3t'7'7#84==#HHMM	
  ]]_++CR0 	,K&{DMMqQF(ST^_`H%%f-NN!!(+		, 0  !DMM1MM	
r#   c                     |d   }|g}|j                  | j                  |             t        j                  |d      }| j	                  |      }|S )Nr\   r   dim)extendrb   r1   catrd   )r    inputsrK   psp_outsr'   s        r"   psp_forwardzUperNetHead.psp_forward   sL    2J3((+,99X1-*r#   encoder_hidden_statesr   c                 P   t        | j                        D cg c]  \  }} |||          }}}|j                  | j                  |             t	        |      }t        |dz
  dd      D ]V  }||dz
     j                  dd  }||dz
     t        j                  j                  ||   |d| j                        z   ||dz
  <   X t        |dz
        D cg c]  } | j                  |   ||          }}|j                  |d          t        |dz
  dd      D ]E  }t        j                  j                  ||   |d   j                  dd  d| j                        ||<   G t        j                  |d      }| j                  |      }| j                  |      }|S c c}}w c c}w )Nr   r   r\   rM   rN   rO   rl   )r=   rf   rI   rr   rc   rangeshaper   rR   rS   rF   rg   r1   ro   rh   ra   )	r    rs   r?   lateral_convlateralsused_backbone_levels
prev_shapefpn_outsr'   s	            r"   r(   zUperNetHead.forward   s   R[\`\n\nRopq,L!6q!9:pp(()>?@  #8}+a/B7 	A!!a%..qr2J&q1uo0I0I*:TM_M_ 1J 1 HQUO	 =BBVYZBZ<[\q%DNN1%hqk2\\%+a/B7 	A--33(1+"3"3AB"7jX\XjXj 4 HQK	 99X1-$$X.(3 q ]s   FF#)
r)   r*   r+   r,   r   rr   r1   r2   r(   r3   r4   s   @r"   rZ   rZ   x   s.    
%
NU\\ ell r#   rZ   c                        e Zd ZdZ	 d
dededeeeeef   f   ddf fdZdej                  dej                  fd	Z
 xZS )UperNetFCNHeada  
    Fully Convolution Networks for Semantic Segmentation. This head is the implementation of
    [FCNNet](https://huggingface.co/papers/1411.4038>).

    Args:
        config:
            Configuration.
        in_channels (int):
            Number of input channels.
        kernel_size (int):
            The kernel size for convs in the head. Default: 3.
        dilation (int):
            The dilation rate for convs in the head. Default: 1.
    in_indexr   r   r   Nc           
      l   t         	|           || _        |j                  ||   n|j                  | _        |j
                  | _        |j                  | _        |j                  | _
        || _        |dz  |z  }g }|j                  t        | j                  | j                  |||             t        | j                  dz
        D ]5  }|j                  t        | j                  | j                  |||             7 | j                  dk(  rt        j                          | _        nt        j$                  | | _        | j                  r8t        | j                  | j                  z   | j                  ||dz        | _        t        j(                  | j                  |j*                  d      | _        y )NrM   )r   r   r   r   r   r]   r:   )r   r   r^   auxiliary_in_channelsr   auxiliary_channelsr8   auxiliary_num_convs	num_convsauxiliary_concat_inputconcat_inputr~   rI   r   ru   r   Identityconvs
Sequentialconv_catr   r`   ra   )
r    r^   r   r~   r   r   conv_paddingr   r?   r!   s
            r"   r   zUperNetFCNHead.__init__   sv    	%+%A%A%IK!vOkOk 	 1133"99 #q(H4  $--[R^iq	

 t~~)* 	ALL!MM4==kS_jr	 >>QDJ.DJ-  4==0$--[bmqrbrDM ))DMM63D3DRSTr#   rs   c                     || j                      }| j                  |      }| j                  r(| j                  t	        j
                  ||gd            }| j                  |      }|S )Nr   rl   )r~   r   r   r   r1   ro   ra   )r    rs   hidden_statesr'   s       r"   r(   zUperNetFCNHead.forward  sX    -dmm<M*]]599mV-D!#LMF(r#   )rM   r   r   )r)   r*   r+   r,   r-   r   r.   r   r1   r2   r(   r3   r4   s   @r"   r}   r}      sm      uv$U-0$UCF$UV[\_afgjlogoap\pVq$U	$ULU\\ ell r#   r}   c                   &    e Zd ZU eed<   dZg Zd Zy)UperNetPreTrainedModelr^   pixel_valuesc                    t        |t        j                        rm|j                  j                  j                  d| j                  j                         |j                  %|j                  j                  j                          y y t        |t        j                        rJ|j                  j                  j                  d       |j                  j                  j                          y y )Ng        )meanstdg      ?)
isinstancer   r   weightdatanormal_r^   initializer_ranger   zero_r   fill_)r    modules     r"   _init_weightsz$UperNetPreTrainedModel._init_weights  s    fbii(MM&&CT[[5R5R&S{{&  &&( '/MM$$S)KK""$ 0r#   N)r)   r*   r+   r   __annotations__main_input_name_no_split_modulesr    r#   r"   r   r     s    $O%r#   r   zW
    UperNet framework leveraging any vision backbone e.g. for ADE20k, CityScapes.
    )custom_introc                        e Zd Z fdZe	 	 	 	 	 d	deej                     dee   dee   deej                     dee   de	e
ef   fd       Z xZS )
UperNetForSemanticSegmentationc                    t         |   |       t        |      | _        t	        || j                  j
                        | _        |j                  r!t        || j                  j
                        nd | _	        | j                          y )N)r   )r   r   r   backbonerZ   r8   decode_headuse_auxiliary_headr}   auxiliary_head	post_init)r    r^   r!   s     r"   r   z'UperNetForSemanticSegmentation.__init__"  sj     %f- 'v4==;Q;QRJPJcJcN6t}}/E/EFim 	
 	r#   r   output_attentionsoutput_hidden_stateslabelsreturn_dictr   c                    |$| j                   j                  dk(  rt        d      ||n| j                   j                  }||n| j                   j                  }||n| j                   j
                  }| j                  j                  |||      }|j                  }| j                  |      }t        j                  j                  ||j                  dd dd      }d}	| j                  A| j                  |      }	t        j                  j                  |	|j                  dd dd      }	d}
|Pt        | j                   j                   	      } |||      }
|	% ||	|      }|
| j                   j"                  |z  z  }
|s|r
|f|dd z   }n	|f|dd z   }|
|
f|z   S |S t%        |
||j&                  |j(                  
      S )a  
        labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
            Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1`, a classification loss is computed (Cross-Entropy).

        Examples:
        ```python
        >>> from transformers import AutoImageProcessor, UperNetForSemanticSegmentation
        >>> from PIL import Image
        >>> from huggingface_hub import hf_hub_download

        >>> image_processor = AutoImageProcessor.from_pretrained("openmmlab/upernet-convnext-tiny")
        >>> model = UperNetForSemanticSegmentation.from_pretrained("openmmlab/upernet-convnext-tiny")

        >>> filepath = hf_hub_download(
        ...     repo_id="hf-internal-testing/fixtures_ade20k", filename="ADE_val_00000001.jpg", repo_type="dataset"
        ... )
        >>> image = Image.open(filepath).convert("RGB")

        >>> inputs = image_processor(images=image, return_tensors="pt")

        >>> outputs = model(**inputs)

        >>> logits = outputs.logits  # shape (batch_size, num_labels, height, width)
        >>> list(logits.shape)
        [1, 150, 512, 512]
        ```Nr   z/The number of labels should be greater than one)r   r   rM   rN   FrO   )ignore_index)losslogitsr   
attentions)r^   r`   
ValueErroruse_return_dictr   r   r   forward_with_filtered_kwargsfeature_mapsr   r   rR   rS   rv   r   r   loss_ignore_indexauxiliary_loss_weightr   r   r   )r    r   r   r   r   r   outputsfeaturesr   auxiliary_logitsr   loss_fctauxiliary_lossr'   s                 r"   r(   z&UperNetForSemanticSegmentation.forward0  s   H $++"8"8A"=NOO%0%<k$++B]B]$8$D $++JjJj 	 2C1N-TXT_T_TqTq--<</CWh = 
 ''!!(+**68J8J128NU_ot*u*#228<!}}88 |'9'9!"'=J^c  9   'T[[5R5RSHFF+D+!)*:F!C99NJJ# WQR[0 WQR[0)-)9TGf$EvE&!//))	
 	
r#   )NNNNN)r)   r*   r+   r   r
   r   r1   r2   r0   r   r.   r   r(   r3   r4   s   @r"   r   r     s      04,0/3)-&*P
u||,P
 $D>P
 'tn	P

 &P
 d^P
 
u--	.P
 P
r#   r   )r,   typingr   r   r1   r   torch.nnr   modeling_outputsr   modeling_utilsr	   utilsr
   utils.backbone_utilsr   configuration_upernetr   Moduler   r6   rD   rZ   r}   r   r   __all__r   r#   r"   <module>r      s    y "   % 7 - # 1 0 		  F "#")) #LQ")) Qh=RYY =@ %_ % % 
`
%; `

`
F ,-E
Fr#   