
    rhf                     j   d Z ddlZddlmZ ddlmZmZ ddlZddlZddlm	Z	 ddl
mZmZmZ ddlmZmZmZmZ dd	lmZ dd
lmZmZ ddlmZ  ej4                  e      Ze ed       G d de                    Z G d de	j<                        Z G d de	j<                        Z  G d de	j<                        Z! G d de	j<                        Z" G d de	j<                        Z# G d de	j<                        Z$ G d de	j<                        Z% G d d e	j<                        Z& G d! d"e	j<                        Z' G d# d$e	j<                        Z( G d% d&e	j<                        Z)e G d' d(e             Z*e G d) d*e*             Z+ ed+       G d, d-e*             Z, ed.       G d/ d0e*             Z-g d1Z.y)2zPyTorch LeViT model.    N)	dataclass)OptionalUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )BaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttentionModelOutput)PreTrainedModel)auto_docstringlogging   )LevitConfigzD
    Output type of [`LevitForImageClassificationWithTeacher`].
    )custom_introc                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeeej                        ed<   y),LevitForImageClassificationWithTeacherOutputan  
    logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
        Prediction scores as the average of the `cls_logits` and `distillation_logits`.
    cls_logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
        Prediction scores of the classification head (i.e. the linear layer on top of the final hidden state of the
        class token).
    distillation_logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
        Prediction scores of the distillation head (i.e. the linear layer on top of the final hidden state of the
        distillation token).
    Nlogits
cls_logitsdistillation_logitshidden_states)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   r   tuple     {/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/levit/modeling_levit.pyr   r   (   sc    	 +/FHU&&'..2J**+27;%"3"34;8<M8E%"3"345<r$   r   c                   ,     e Zd ZdZ	 d fd	Zd Z xZS )LevitConvEmbeddingsz[
    LeViT Conv Embeddings with Batch Norm, used in the initial patch embedding layer.
    c	           
          t         	|           t        j                  |||||||d      | _        t        j
                  |      | _        y )NF)dilationgroupsbias)super__init__r   Conv2dconvolutionBatchNorm2d
batch_norm)
selfin_channelsout_channelskernel_sizestridepaddingr)   r*   bn_weight_init	__class__s
            r%   r-   zLevitConvEmbeddings.__init__E   sF     	99{FGh_elq
 ..6r$   c                 J    | j                  |      }| j                  |      }|S N)r/   r1   )r2   
embeddingss     r%   forwardzLevitConvEmbeddings.forwardN   s&    %%j1
__Z0
r$   )r   r   r   r   r   r   r   r-   r=   __classcell__r9   s   @r%   r'   r'   @   s    
 mn7r$   r'   c                   (     e Zd ZdZ fdZd Z xZS )LevitPatchEmbeddingsz
    LeViT patch embeddings, for final embeddings to be passed to transformer blocks. It consists of multiple
    `LevitConvEmbeddings`.
    c                 X   t         |           t        |j                  |j                  d   dz  |j
                  |j                  |j                        | _        t        j                         | _        t        |j                  d   dz  |j                  d   dz  |j
                  |j                  |j                        | _        t        j                         | _        t        |j                  d   dz  |j                  d   dz  |j
                  |j                  |j                        | _        t        j                         | _        t        |j                  d   dz  |j                  d   |j
                  |j                  |j                        | _        |j                  | _        y )Nr            )r,   r-   r'   num_channelshidden_sizesr5   r6   r7   embedding_layer_1r   	Hardswishactivation_layer_1embedding_layer_2activation_layer_2embedding_layer_3activation_layer_3embedding_layer_4r2   configr9   s     r%   r-   zLevitPatchEmbeddings.__init__Z   so   !4!4!4Q!71!<f>P>PRXR_R_agaoao"
 #%,,.!4"a')<)<Q)?1)DfFXFXZ`ZgZgioiwiw"
 #%,,.!4"a')<)<Q)?1)DfFXFXZ`ZgZgioiwiw"
 #%,,.!4"a')<)<Q)?ASASU[UbUbdjdrdr"
 #//r$   c                    |j                   d   }|| j                  k7  rt        d      | j                  |      }| j	                  |      }| j                  |      }| j                  |      }| j                  |      }| j                  |      }| j                  |      }|j                  d      j                  dd      S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.rF   )shaperG   
ValueErrorrI   rK   rL   rM   rN   rO   rP   flatten	transpose)r2   pixel_valuesrG   r<   s       r%   r=   zLevitPatchEmbeddings.forwardp   s    #))!,4,,,w  ++L9
,,Z8
++J7
,,Z8
++J7
,,Z8
++J7
!!!$..q!44r$   r>   r@   s   @r%   rB   rB   T   s    
0,5r$   rB   c                   &     e Zd Zd fd	Zd Z xZS )MLPLayerWithBNc                     t         |           t        j                  ||d      | _        t        j
                  |      | _        y )NF)in_featuresout_featuresr+   )r,   r-   r   LinearlinearBatchNorm1dr1   )r2   	input_dim
output_dimr8   r9   s       r%   r-   zMLPLayerWithBN.__init__   s3    iiIJUZ[..4r$   c                     | j                  |      }| j                  |j                  dd            j                  |      }|S )Nr   r   )r_   r1   rV   
reshape_asr2   hidden_states     r%   r=   zMLPLayerWithBN.forward   s<    {{<0|';';Aq'ABMMl[r$   )r   r   r   r   r-   r=   r?   r@   s   @r%   rZ   rZ      s    5
r$   rZ   c                   $     e Zd Z fdZd Z xZS )LevitSubsamplec                 >    t         |           || _        || _        y r;   )r,   r-   r6   
resolution)r2   r6   rk   r9   s      r%   r-   zLevitSubsample.__init__   s    $r$   c                     |j                   \  }}}|j                  || j                  | j                  |      d d d d | j                  d d | j                  f   j	                  |d|      }|S )N)rT   viewrk   r6   reshape)r2   rf   
batch_size_channelss        r%   r=   zLevitSubsample.forward   sk    "."4"4
Ax#((T__dooW_`~$++~~$++~-

'*b(
+ 	 r$   rg   r@   s   @r%   ri   ri      s    %
r$   ri   c                   ^     e Zd Z fdZ ej
                         d fd	       Zd Zd Z xZ	S )LevitAttentionc                 ~   t         |           || _        |dz  | _        || _        || _        ||z  |z  ||z  dz  z   | _        ||z  |z  | _        t        || j                        | _	        t        j                         | _        t        | j                  |d      | _        t        t        j                   t#        |      t#        |                  }t%        |      }i g }	}|D ]W  }
|D ]P  }t'        |
d   |d   z
        t'        |
d   |d   z
        f}||vrt%        |      ||<   |	j)                  ||          R Y i | _        t,        j                  j/                  t-        j0                  |t%        |                  | _        | j5                  dt-        j6                  |	      j9                  ||      d       y )	N      rF   r   )r8   r   attention_bias_idxsF
persistent)r,   r-   num_attention_headsscalekey_dimattention_ratioout_dim_keys_valuesout_dim_projectionrZ   queries_keys_valuesr   rJ   
activation
projectionlist	itertoolsproductrangelenabsappendattention_bias_cacher   	Parameterzerosattention_biasesregister_buffer
LongTensorrn   )r2   rH   r|   rz   r}   rk   points
len_pointsattention_offsetsindicesp1p2offsetr9   s                r%   r-   zLevitAttention.__init__   s   #6 d]
.#2W#<?R#RU\_rUruvUv#v "1G";>Q"Q#1,@X@X#Y ,,.()@)@,_`ai''j(95;LMN[
%'7 	:B :bebem,c"Q%"Q%-.@A!22034E0F%f-089	:	: %'! % 2 25;;?RTWXiTj3k l!5#3#3G#<#A#A*j#Yfk 	 	
r$   c                 R    t         |   |       |r| j                  ri | _        y y y r;   r,   trainr   r2   moder9   s     r%   r   zLevitAttention.train   )    dD--(*D% .4r$   c                     | j                   r| j                  d d | j                  f   S t        |      }|| j                  vr*| j                  d d | j                  f   | j                  |<   | j                  |   S r;   trainingr   rw   strr   r2   device
device_keys      r%   get_attention_biasesz#LevitAttention.get_attention_biases   t    ==((D,D,D)DEEVJ!:!::8<8M8MaQUQiQiNi8j))*5,,Z88r$   c                    |j                   \  }}}| j                  |      }|j                  ||| j                  d      j	                  | j
                  | j
                  | j                  | j
                  z  gd      \  }}}|j                  dddd      }|j                  dddd      }|j                  dddd      }||j                  dd      z  | j                  z  | j                  |j                        z   }	|	j                  d      }	|	|z  j                  dd      j                  ||| j                        }| j                  | j!                  |            }|S Nrm   r
   dimr   rF   r   )rT   r   rn   rz   splitr|   r}   permuterW   r{   r   r   softmaxro   r   r   r   )
r2   rf   rp   
seq_lengthrq   r   querykeyvalue	attentions
             r%   r=   zLevitAttention.forward   sN   $0$6$6!
J"66|D/44ZTMeMegijpp\\4<<)=)=)LMST q 
sE aAq)kk!Q1%aAq)CMM"b11DJJ>AZAZ[g[n[nAoo	%%"%-	!E)44Q:BB:z[_[r[rst|'DEr$   T
r   r   r   r-   r   no_gradr   r   r=   r?   r@   s   @r%   rt   rt      s.    
: U]]_+ +
9r$   rt   c                   ^     e Zd Z fdZ ej
                         d fd	       Zd Zd Z xZ	S )LevitAttentionSubsamplec	                 x   t         |           || _        |dz  | _        || _        || _        ||z  |z  ||z  z   | _        ||z  |z  | _        || _        t        || j                        | _
        t        ||      | _        t        |||z        | _        t        j                         | _        t        | j                  |      | _        i | _        t'        t)        j*                  t-        |      t-        |                  }	t'        t)        j*                  t-        |      t-        |                  }
t/        |	      t/        |
      }}i g }}|
D ]q  }|	D ]j  }d}t1        |d   |z  |d   z
  |dz
  dz  z         t1        |d   |z  |d   z
  |dz
  dz  z         f}||vrt/        |      ||<   |j3                  ||          l s t4        j                  j7                  t5        j8                  |t/        |                  | _        | j=                  dt5        j>                  |      jA                  ||      d       y )Nrv   r   r   rF   rw   Frx   )!r,   r-   rz   r{   r|   r}   r~   r   resolution_outrZ   keys_valuesri   queries_subsamplequeriesr   rJ   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rn   )r2   ra   rb   r|   rz   r}   r6   resolution_inr   r   points_r   len_points_r   r   r   r   sizer   r9   s                      r%   r-   z LevitAttentionSubsample.__init__   s1    	#6 d]
.#2W#<?R#RU\_rUr#r "1G";>Q"Q,))T5M5MN!/!F%i;N1NO,,.()@)@*M$&!i''m(<eM>RSTy((~)>n@UVW"%f+s7|K
%'7 	:B :befnr!u4qA~EFBqETZN]_`a]bLbfjmnfnrsesLsHtu!22034E0F%f-089:	: !& 2 25;;?RTWXiTj3k l!5#3#3G#<#A#A+z#Zgl 	 	
r$   c                 R    t         |   |       |r| j                  ri | _        y y y r;   r   r   s     r%   r   zLevitAttentionSubsample.train  r   r$   c                     | j                   r| j                  d d | j                  f   S t        |      }|| j                  vr*| j                  d d | j                  f   | j                  |<   | j                  |   S r;   r   r   s      r%   r   z,LevitAttentionSubsample.get_attention_biases  r   r$   c                 L   |j                   \  }}}| j                  |      j                  ||| j                  d      j	                  | j
                  | j                  | j
                  z  gd      \  }}|j                  dddd      }|j                  dddd      }| j                  | j                  |            }|j                  || j                  dz  | j                  | j
                        j                  dddd      }||j                  dd      z  | j                  z  | j                  |j                        z   }|j                  d      }||z  j                  dd      j!                  |d| j"                        }| j%                  | j'                  |            }|S r   )rT   r   rn   rz   r   r|   r}   r   r   r   r   rW   r{   r   r   r   ro   r   r   r   )	r2   rf   rp   r   rq   r   r   r   r   s	            r%   r=   zLevitAttentionSubsample.forward  s~   $0$6$6!
J\*T*j$*B*BBGUDLL$"6"6"EFAUN 	U
 kk!Q1%aAq)T33LAB

:t':':A'=t?W?WY]YeYefnnq!Q
 CMM"b11DJJ>AZAZ[g[n[nAoo	%%"%-	!E)44Q:BB:rSWSjSjkt|'DEr$   r   r   r@   s   @r%   r   r      s/    +
Z U]]_+ +
9r$   r   c                   (     e Zd ZdZ fdZd Z xZS )LevitMLPLayerzE
    MLP Layer with `2X` expansion in contrast to ViT with `4X`.
    c                     t         |           t        ||      | _        t	        j
                         | _        t        ||      | _        y r;   )r,   r-   rZ   	linear_upr   rJ   r   linear_down)r2   ra   
hidden_dimr9   s      r%   r-   zLevitMLPLayer.__init__0  s8    '	:>,,.)*i@r$   c                 l    | j                  |      }| j                  |      }| j                  |      }|S r;   )r   r   r   re   s     r%   r=   zLevitMLPLayer.forward6  s4    ~~l3|4''5r$   r>   r@   s   @r%   r   r   +  s    Ar$   r   c                   (     e Zd ZdZ fdZd Z xZS )LevitResidualLayerz"
    Residual Block for LeViT
    c                 >    t         |           || _        || _        y r;   )r,   r-   module	drop_rate)r2   r   r   r9   s      r%   r-   zLevitResidualLayer.__init__B  s    "r$   c                    | j                   r| j                  dkD  rt        j                  |j	                  d      dd|j
                        }|j                  | j                        j                  d| j                  z
        j                         }|| j                  |      |z  z   }|S || j                  |      z   }|S )Nr   r   )r   )
r   r   r   randr   r   ge_divdetachr   )r2   rf   rnds      r%   r=   zLevitResidualLayer.forwardG  s    ==T^^a/**\..q11a@S@STC''$..)--a$...@AHHJC'$++l*Cc*IIL'$++l*CCLr$   r>   r@   s   @r%   r   r   =  s    #
 r$   r   c                   .     e Zd ZdZ fdZd Zd Z xZS )
LevitStagezP
    LeViT Stage consisting of `LevitMLPLayer` and `LevitAttention` layers.
    c                 
   t         |           g | _        || _        |
| _        t        |      D ]  }| j                  j                  t        t        |||||
      | j                  j                               |dkD  sO||z  }| j                  j                  t        t        ||      | j                  j                                |	d   dk(  r| j                  dz
  |	d   z  dz   | _        | j                  j                  t        | j                  j                  ||dz    |	d   |	d   |	d   |	d   |
| j                  d       | j                  | _        |	d   dkD  r| j                  j                  |dz      |	d   z  }| j                  j                  t        t        | j                  j                  |dz      |      | j                  j                               t        j                  | j                        | _        y )	Nr   	Subsampler      rF   r
   )r|   rz   r}   r6   r   r   rE   )r,   r-   layersrR   r   r   r   r   rt   drop_path_rater   r   r   rH   r   
ModuleList)r2   rR   idxrH   r|   depthsrz   r}   	mlp_ratiodown_opsr   rq   r   r9   s                r%   r-   zLevitStage.__init__W  s    	*v 	AKK""<:M`mnKK.. 1})I5
""&}\:'NPTP[P[PjPjk	 A;+%#'#5#5#9hqk"IA"MDKK'[[--cC!G<$QK(0$,QK#A;"/#'#6#6
 "&!4!4D{Q![[55cAg>!L
""&%dkk&>&>sQw&GTVZVaVaVpVp mmDKK0r$   c                     | j                   S r;   )r   )r2   s    r%   get_resolutionzLevitStage.get_resolution  s    !!!r$   c                 8    | j                   D ]
  } ||      } |S r;   )r   )r2   rf   layers      r%   r=   zLevitStage.forward  s%    [[ 	/E .L	/r$   )r   r   r   r   r-   r   r=   r?   r@   s   @r%   r   r   R  s    51n"r$   r   c                   *     e Zd ZdZ fdZddZ xZS )LevitEncoderzC
    LeViT Encoder consisting of multiple `LevitStage` stages.
    c                    t         |           || _        | j                  j                  | j                  j                  z  }g | _        | j                  j                  j                  dg       t        t        |j                              D ]  }t        |||j                  |   |j                  |   |j                  |   |j                  |   |j                  |   |j                   |   |j                  |   |
      }|j#                         }| j
                  j                  |        t%        j&                  | j
                        | _        y )N )r,   r-   rR   
image_size
patch_sizestagesr   r   r   r   r   r   rH   r|   rz   r}   r   r   r   r   )r2   rR   rk   	stage_idxstager9   s        r%   r-   zLevitEncoder.__init__  s   [[++t{{/E/EE
##RD)s6==12 	&I##I.y)i(**95&&y1  +	*E --/JKKu%	&  mmDKK0r$   c                     |rdnd }| j                   D ]  }|r||fz   } ||      } |r||fz   }|st        d ||fD              S t        ||      S )Nr#   c              3   &   K   | ]	  }||  y wr;   r#   ).0vs     r%   	<genexpr>z'LevitEncoder.forward.<locals>.<genexpr>  s     WqWs   )last_hidden_stater   )r   r"   r   )r2   rf   output_hidden_statesreturn_dictall_hidden_statesr   s         r%   r=   zLevitEncoder.forward  ss    "6BD[[ 	/E#$5$G! .L	/
   1\O CW\3D$EWWW-\mnnr$   )FTr>   r@   s   @r%   r   r     s    12or$   r   c                   (     e Zd ZdZ fdZd Z xZS )LevitClassificationLayerz$
    LeViT Classification Layer
    c                     t         |           t        j                  |      | _        t        j
                  ||      | _        y r;   )r,   r-   r   r`   r1   r^   r_   )r2   ra   rb   r9   s      r%   r-   z!LevitClassificationLayer.__init__  s0    ..3ii	:6r$   c                 J    | j                  |      }| j                  |      }|S r;   )r1   r_   )r2   rf   r   s      r%   r=   z LevitClassificationLayer.forward  s#    |4\*r$   r>   r@   s   @r%   r   r     s    7
r$   r   c                   ,    e Zd ZU eed<   dZdZdgZd Zy)LevitPreTrainedModelrR   levitrX   r   c                    t        |t        j                  t        j                  f      rm|j                  j
                  j                  d| j                  j                         |j                  %|j                  j
                  j                          yyt        |t        j                  t        j                  f      rJ|j                  j
                  j                          |j                  j
                  j                  d       yy)zInitialize the weightsg        )meanstdNg      ?)
isinstancer   r^   r.   weightdatanormal_rR   initializer_ranger+   zero_r`   r0   fill_)r2   r   s     r%   _init_weightsz"LevitPreTrainedModel._init_weights  s    fryy"))45 MM&&CT[[5R5R&S{{&  &&( ' @AKK""$MM$$S) Br$   N)	r   r   r   r   r!   base_model_prefixmain_input_name_no_split_modulesr	  r#   r$   r%   r   r     s!    $O-.
*r$   r   c                   x     e Zd Z fdZe	 	 	 ddeej                     dee   dee   de	e
ef   fd       Z xZS )
LevitModelc                     t         |   |       || _        t        |      | _        t        |      | _        | j                          y r;   )r,   r-   rR   rB   patch_embeddingsr   encoder	post_initrQ   s     r%   r-   zLevitModel.__init__  s:      4V <#F+r$   rX   r   r   returnc                 D   ||n| j                   j                  }||n| j                   j                  }|t        d      | j	                  |      }| j                  |||      }|d   }|j                  d      }|s
||f|dd  z   S t        |||j                        S )Nz You have to specify pixel_valuesr   r   r   r   r   )r   pooler_outputr   )	rR   r   use_return_dictrU   r  r  r   r   r   )r2   rX   r   r   r<   encoder_outputsr   pooled_outputs           r%   r=   zLevitModel.forward  s     %9$D $++JjJj 	 &1%<k$++B]B]?@@**<8
,,!5# ' 
 ,A. *..1.5%}58KKK7/')77
 	
r$   NNN)r   r   r   r-   r   r   r   r    boolr   r"   r   r=   r?   r@   s   @r%   r  r    sk      59/3&*	!
u001!
 'tn!
 d^	!

 
u>>	?!
 !
r$   r  z
    Levit Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                        e Zd Z fdZe	 	 	 	 ddeej                     deej                     dee	   dee	   de
eef   f
d       Z xZS )	LevitForImageClassificationc                 >   t         |   |       || _        |j                  | _        t	        |      | _        |j                  dkD  r#t        |j                  d   |j                        nt        j                  j                         | _        | j                          y Nr   rm   )r,   r-   rR   
num_labelsr  r   r   rH   r   r   Identity
classifierr  rQ   s     r%   r-   z$LevitForImageClassification.__init__  s      ++'

   1$ %V%8%8%<f>O>OP""$ 	 	r$   rX   labelsr   r   r  c                    ||n| j                   j                  }| j                  |||      }|d   }|j                  d      }| j	                  |      }d}|| j                   j
                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rIt               }	| j                  dk(  r& |	|j                         |j                               }n |	||      }n| j                   j
                  dk(  r=t               }	 |	|j                  d| j                        |j                  d            }n,| j                   j
                  dk(  rt               }	 |	||      }|s|f|d	d z   }
||f|
z   S |
S t!        |||j"                  
      S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr  r   r   
regressionsingle_label_classificationmulti_label_classificationrm   rF   )lossr   r   )rR   r  r   r   r"  problem_typer   dtyper   longintr	   squeezer   rn   r   r   r   )r2   rX   r#  r   r   outputssequence_outputr   r(  loss_fctoutputs              r%   r=   z#LevitForImageClassification.forward/  s    &1%<k$++B]B]**\@Tbm*n!!*)..q11{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE3!//
 	
r$   )NNNN)r   r   r   r-   r   r   r   r    r   r  r   r"   r   r=   r?   r@   s   @r%   r  r    s       59-1/3&*3
u0013
 ))*3
 'tn	3

 d^3
 
u::	;3
 3
r$   r  ap  
    LeViT Model transformer with image classification heads on top (a linear layer on top of the final hidden state and
    a linear layer on top of the final hidden state of the distillation token) e.g. for ImageNet. .. warning::
           This model supports inference-only. Fine-tuning with distillation (i.e. with a teacher) is not yet
           supported.
    c                   x     e Zd Z fdZe	 	 	 ddeej                     dee   dee   de	e
ef   fd       Z xZS )&LevitForImageClassificationWithTeacherc                    t         |   |       || _        |j                  | _        t	        |      | _        |j                  dkD  r#t        |j                  d   |j                        nt        j                  j                         | _        |j                  dkD  r#t        |j                  d   |j                        nt        j                  j                         | _        | j                          y r  )r,   r-   rR   r   r  r   r   rH   r   r   r!  r"  classifier_distillr  rQ   s     r%   r-   z/LevitForImageClassificationWithTeacher.__init__o  s      ++'

   1$ %V%8%8%<f>O>OP""$ 	   1$ %V%8%8%<f>O>OP""$ 	 	r$   rX   r   r   r  c                 .   ||n| j                   j                  }| j                  |||      }|d   }|j                  d      }| j	                  |      | j                  |      }}||z   dz  }|s|||f|dd  z   }	|	S t        ||||j                        S )Nr  r   r   rF   )r   r   r   r   )rR   r  r   r   r"  r5  r   r   )
r2   rX   r   r   r.  r/  r   distill_logitsr   r1  s
             r%   r=   z.LevitForImageClassificationWithTeacher.forward  s     &1%<k$++B]B]**\@Tbm*n!!*)..q1%)___%EtG^G^_nGoN
~-2j.9GABKGFM;! .!//	
 	
r$   r  )r   r   r   r-   r   r   r   r    r  r   r"   r   r=   r?   r@   s   @r%   r3  r3  f  sk    *  59/3&*	
u001
 'tn
 d^	

 
uBB	C
 
r$   r3  )r  r3  r  r   )/r   r   dataclassesr   typingr   r   r   torch.utils.checkpointr   torch.nnr   r   r	   modeling_outputsr   r   r   r   modeling_utilsr   utilsr   r   configuration_levitr   
get_loggerr   loggerr   Moduler'   rB   rZ   ri   rt   r   r   r   r   r   r   r   r  r  r3  __all__r#   r$   r%   <module>rD     s     ! "    A A  . , , 
		H	% 
=; = =$")) ()5299 )5X	RYY 	RYY ;RYY ;|Pbii PfBII $   *B BJ+o299 +o\ryy   *? * *& +
% +
 +
\ E
"6 E
E
P /
-A /
/
dr$   