
    rhL                        d dl mZ d dlmZmZmZ d dlZd dlmZ ddlm	Z	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZmZmZmZmZ ddlmZ ddlmZ e ed       G d de                    Z  G d dejB                        Z" G d dejB                        Z# G d de      Z$ G d dejB                        Z% G d dejB                        Z& G d dejB                        Z'd  Z(dQd!Z)d"ejT                  d#e+d$ejT                  fd%Z,	 dRd&ejB                  d'ejT                  d(ejT                  d)ejT                  d*eejT                     d+e-d,e-d-ee   fd.Z. G d/ d0ejB                        Z/ G d1 d2ejB                        Z0 G d3 d4ejB                        Z1 G d5 d6e      Z2 G d7 d8ejB                        Z3 G d9 d:ejB                        Z4 G d; d<ejB                        Z5e G d= d>e             Z6 ed?       G d@ dAe6             Z7dBejT                  dCe+d)ee8e-e+f   d$ejT                  fdDZ9	 	 	 dSdEee+ejT                  f   dFee+ejT                  f   dGe8dHeejt                     dIeejv                     d$ejT                  fdJZ<dTdKejT                  dGe8d$ejT                  fdLZ= edM       G dN dOe6             Z>g dPZ?y)U    )	dataclass)CallableOptionalUnionN)nn   )ACT2CLSACT2FN)GradientCheckpointingLayer)BackboneOutput)ROPE_INIT_FUNCTIONS)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)ModelOutputTransformersKwargsauto_docstringcan_return_tuple	torch_int)check_model_inputs   )EfficientLoFTRConfiga  
    Base class for outputs of keypoint matching models. Due to the nature of keypoint detection and matching, the number
    of keypoints is not fixed and can vary from image to image, which makes batching non-trivial. In the batch of
    images, the maximum number of matches is set as the dimension of the matches and matching scores. The mask tensor is
    used to indicate which values in the keypoints, matches and matching_scores tensors are keypoint matching
    information.
    )custom_introc                       e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeeej                        ed<   dZeeej                        ed<   y)KeypointMatchingOutputa  
    matches (`torch.FloatTensor` of shape `(batch_size, 2, num_matches)`):
        Index of keypoint matched in the other image.
    matching_scores (`torch.FloatTensor` of shape `(batch_size, 2, num_matches)`):
        Scores of predicted matches.
    keypoints (`torch.FloatTensor` of shape `(batch_size, num_keypoints, 2)`):
        Absolute (x, y) coordinates of predicted keypoints in a given image.
    hidden_states (`tuple[torch.FloatTensor, ...]`, *optional*):
        Tuple of `torch.FloatTensor` (one for the output of each stage) of shape `(batch_size, 2, num_channels,
        num_keypoints)`, returned when `output_hidden_states=True` is passed or when
        `config.output_hidden_states=True`)
    attentions (`tuple[torch.FloatTensor, ...]`, *optional*):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, 2, num_heads, num_keypoints,
        num_keypoints)`, returned when `output_attentions=True` is passed or when `config.output_attentions=True`)
    Nmatchesmatching_scores	keypointshidden_states
attentions)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   r   tupler         /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/efficientloftr/modeling_efficientloftr.pyr   r   %   s|      ,0GXe''(/37OXe//07-1Ix))*18<M8E%"3"345<59Ju00129r*   r   c            
            e Zd Zddef fdZ ej                         	 ddej                  dee	ej                  ej                  f      de	ej                  ej                  f   fd       Z xZS )EfficientLoFTRRotaryEmbeddingconfigc                    t         |           || _        |j                  d   | _        t
        | j                     | _        | j                  | j                  |      \  }}|d d d d d f   j                         j                  dddd      }|j                  \  }}t        j                  ||      j                  d      j                         j                  d      }t        j                  ||      j                  d      j                         j                  d      }	t        j                  d||| j                  j                  dz        }
||z  |
d d d d d d dd df<   |	|z  |
d d d d d d dd df<   | j!                  d|
d       y )	N	rope_typer   r      inv_freqF)
persistent)super__init__r.   rope_scalingr0   r   rope_init_fnfloatexpandembedding_sizer%   onescumsum	unsqueezezeroshidden_sizeregister_buffer)selfr.   devicer3   _inv_freq_expandedembed_heightembed_width	i_indices	j_indicesemb	__class__s              r+   r6   z&EfficientLoFTRRotaryEmbedding.__init__H   sf   ,,[9/?''V<!$T4q%89??AHHAqRTU$*$9$9!kJJ|[9@@CIIKUUVXY	JJ|[9@@CIIKUUVXY	kk!\;8O8OST8TU&)::Aq!QTTM&)::Aq!QTTMZ?r*   xposition_idsreturnc                 L   t        |j                  j                  t              r/|j                  j                  dk7  r|j                  j                  nd}t	        j
                  |d      5  | j                  }|j                         }|j                         }d d d        j                  dd      }j                  dd      }|j                  |j                  |j                        }|j                  |j                  |j                        }||fS # 1 sw Y   xY w)	NmpscpuF)device_typeenabledr2   r1   dimrC   dtype)
isinstancerC   typestrr%   autocastr3   sincosrepeat_interleavetorW   )rB   rL   rM   rR   rJ   r\   r]   s          r+   forwardz%EfficientLoFTRRotaryEmbedding.forward[   s     (2!((--'E!((--[`J`ahhmmfk^^UC 	--C'')C'')C	
 ##A2#.##A2#.ffAHHAGGf4ffAHHAGGf4Cx	 	s   --DD#N)r!   r"   r#   r   r6   r%   no_gradTensorr   r(   
LongTensorr`   __classcell__rK   s   @r+   r-   r-   G   sz    @3 @& U]]_cg-5eE<L<LeN^N^<^6_-`	u||U\\)	* r*   r-   c                   &     e Zd Zd fd	Zd Z xZS )EfficientLoFTRConvNormLayerc                    t         |           t        j                  ||||||dz
  dz  n|d      | _        t        j
                  ||j                        | _        |t        j                         | _
        y t        |          | _
        y )Nr   r2   F)paddingbias)r5   r6   r   Conv2dconvBatchNorm2dbatch_norm_epsnormIdentityr	   
activation)	rB   r.   in_channelsout_channelskernel_sizestriderj   rr   rK   s	           r+   r6   z$EfficientLoFTRConvNormLayer.__init__p   sy    II.5o[1_*7
	 NN<1F1FG	+5+="++-7:CVCXr*   c                 l    | j                  |      }| j                  |      }| j                  |      }|S ra   )rm   rp   rr   )rB   hidden_states     r+   r`   z#EfficientLoFTRConvNormLayer.forward}   s2    yy.yy.|4r*   NN)r!   r"   r#   r6   r`   re   rf   s   @r+   rh   rh   o   s    Yr*   rh   c                   h     e Zd ZdZdededef fdZdej                  dej                  fdZ	 xZ
S )	EfficientLoFTRRepVGGBlockzk
    RepVGG architecture block introduced by the work "RepVGG: Making VGG-style ConvNets Great Again".
    r.   	stage_idx	block_idxc                    t         |           |j                  |   |   }|j                  |   |   }|j                  |   |   }|j
                  }t        |||d|d      | _        t        |||d|d      | _        ||k(  r|dk(  rt        j                  |      nd | _        |t        j                         | _        y t        |   | _        y )Nr   r   ru   rv   rj   r   )r5   r6   stage_block_in_channelsstage_block_out_channelsstage_block_strideactivation_functionrh   conv1conv2r   rn   identityrq   r
   rr   )	rB   r.   r|   r}   rs   rt   rv   rr   rK   s	           r+   r6   z"EfficientLoFTRRepVGGBlock.__init__   s    44Y?	J66yA)L**95i@//
0K1VUV

 1K1VUV

 8Cl7RW]abWb{3hl+5+="++-6*CUr*   r   rN   c                     | j                   | j                  |      }nd}| j                  |      | j                  |      z   |z   }| j                  |      }|S )Nr   )r   r   r   rr   )rB   r   identity_outs      r+   r`   z!EfficientLoFTRRepVGGBlock.forward   sU    ==$==7LL

=1DJJ}4MMP\\6r*   )r!   r"   r#   r$   r   intr6   r%   rc   r`   re   rf   s   @r+   r{   r{      sC    V3 V VPS VU\\ ell r*   r{   c                   `     e Zd Zdedef fdZdej                  dej                  fdZ xZ	S )EfficientLoFTRRepVGGStager.   r|   c                     t         |           t        j                  g       | _        t        |j                  |         D ](  }| j                  j                  t        |||             * y ra   )	r5   r6   r   
ModuleListblocksrangestage_num_blocksappendr{   )rB   r.   r|   r}   rK   s       r+   r6   z"EfficientLoFTRRepVGGStage.__init__   s`    mmB'v66yAB 	IKK)	r*   r   rN   c                 8    | j                   D ]
  } ||      } |S ra   )r   )rB   r   blocks      r+   r`   z!EfficientLoFTRRepVGGStage.forward   s%    [[ 	1E!-0M	1r*   
r!   r"   r#   r   r   r6   r%   rc   r`   re   rf   s   @r+   r   r      s2    
3 
 
U\\ ell r*   r   c                   b     e Zd Zdef fdZdej                  deej                     fdZ xZ	S )EfficientLoFTRepVGGr.   c                     t         |           t        j                  g       | _        t        t        |j                              D ])  }t        ||      }| j                  j                  |       + y ra   )
r5   r6   r   r   stagesr   lenstage_strider   r   )rB   r.   r|   stagerK   s       r+   r6   zEfficientLoFTRepVGG.__init__   s[    mmB's6#6#678 	&I-fi@EKKu%	&r*   r   rN   c                 h    g }| j                   D ]  } ||      }|j                  |        |dd  }|S Nr   )r   r   )rB   r   outputsr   s       r+   r`   zEfficientLoFTRepVGG.forward   sB    [[ 	*E!-0MNN=)	*
 !"+r*   )
r!   r"   r#   r   r6   r%   rc   listr`   re   rf   s   @r+   r   r      s0    &3 &U\\ d5<<6H r*   r   c            	            e Zd Zdef fdZ	 ddej                  deej                     deej                  ej                  f   fdZ	 xZ
S )EfficientLoFTRAggregationLayerr.   c           	      L   t         |           |j                  }t        j                  |||j
                  d|j                  d|      | _        t        j                  j                  |j                  |j                        | _        t        j                  |      | _        y )Nr   F)ru   rj   rv   rk   groups)ru   rv   )r5   r6   r@   r   rl   q_aggregation_kernel_sizeq_aggregation_strideq_aggregationr%   	MaxPool2dkv_aggregation_kernel_sizekv_aggregation_stridekv_aggregation	LayerNormrp   )rB   r.   r@   rK   s      r+   r6   z'EfficientLoFTRAggregationLayer.__init__   s    ((YY88..
 $hh0099&B^B^ 1 
 LL-	r*   r   encoder_hidden_statesrN   c                     |}|d u}|r|n|}| j                  |      }| j                  |      }|j                  dddd      }|j                  dddd      }| j                  |      }| j                  |      }||fS )Nr   r2   r   r   )r   r   permuterp   )rB   r   r   query_statesis_cross_attention	kv_statess         r+   r`   z&EfficientLoFTRAggregationLayer.forward   s    
 %2$>-?)]	)),7''	2	#++Aq!Q7%%aAq1			,/ $		) 4333r*   ra   )r!   r"   r#   r   r6   r%   rc   r   r(   r`   re   rf   s   @r+   r   r      sU    .3 ., 9=4||4  (54 
u||U\\)	*	4r*   r   c                     | dd d df   }| ddd df   }t        j                  | |gd      j                  d      }|S )N.r2   r   r1   rT   )r%   stackflatten)rL   x1x2rot_xs       r+   rotate_halfr      sL    	
3!8B	
319BKK"b	r*2226ELr*   c                 6   | j                   }| j                         } |j                         }|j                  |      }|j                  |      }| |z  t        |       |z  z   }||z  t        |      |z  z   }|j	                  |      |j	                  |      fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    )rW   )rW   r9   r>   r   r_   )	qkr]   r\   rM   unsqueeze_dimrW   q_embedk_embeds	            r+   apply_rotary_pos_embr      s    ( GGE		A		A
--
&C
--
&C3w;q>C/0G3w;q>C/0G::E:"GJJUJ$;;;r*   r   n_reprN   c                     | j                   \  }}}}|dk(  r| S | dddddddddf   j                  |||||      } | j                  |||z  ||      S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    r   N)shaper:   reshape)r   r   batchnum_key_value_headsslenhead_dims         r+   	repeat_kvr     so    
 2?1D1D.Ehz!!Qa"23::5BUW\^bdlmM  (;e(CT8TTr*   modulequerykeyvalueattention_maskscalingdropoutkwargsc                 T   t        || j                        }t        || j                        }	t        j                  ||j	                  dd            |z  }
|#|d d d d d d d |j
                  d   f   }|
|z   }
t        j                  j                  |
dt        j                        j                  |j                        }
t        j                  j                  |
|| j                        }
t        j                  |
|	      }|j	                  dd      j                         }||
fS )Nr2   r   r   r1   )rU   rW   )ptrainingr   )r   num_key_value_groupsr%   matmul	transposer   r   
functionalsoftmaxfloat32r_   rW   r   r   
contiguous)r   r   r   r   r   r   r   r   
key_statesvalue_statesattn_weightscausal_maskattn_outputs                r+   eager_attention_forwardr   &  s    3 ; ;<JUF$?$?@L<<z';';Aq'ABWLL!$Q1.D
0@0@0D.D%DE#k1==((2U]](SVVW\WbWbcL==((6??([L,,|\:K''1-88:K$$r*   c                        e Zd ZdZdedef fdZ	 	 ddej                  de	ej                     de	e
ej                  ej                  f      dee   d	e
ej                  e	ej                     f   f
d
Z xZS )EfficientLoFTRAttentionz=Multi-headed attention from 'Attention Is All You Need' paperr.   	layer_idxc                 d   t         |           || _        || _        t	        |d|j
                  |j                  z        | _        |j                  |j                  z  | _	        | j                  dz  | _
        |j                  | _        d| _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j                  | j                  z  |j
                  |j                        | _        y )Nr   g      Trk   )r5   r6   r.   r   getattrr@   num_attention_headsr   r   r   r   attention_dropout	is_causalr   Linearattention_biasq_projk_projv_projo_projrB   r.   r   rK   s      r+   r6   z EfficientLoFTRAttention.__init__D  sM   "
F4F4F&JdJd4de$*$>$>&B\B\$\!}}d*!'!9!9ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii&&68J8JQWQfQf
r*   r   r   position_embeddingsr   rN   c                    |j                   \  }}}|j                   d d }| j                  |      j                  ||d|      }	|d u}
|
r|n|}| j                  |      j                  ||d|      }| j	                  |      j                  ||d| j
                        j                  dd      }||\  }}t        |	|||d      \  }	}|	j                  ||d| j
                        j                  dd      }	|j                  ||d| j
                        j                  dd      }t        }| j                  j                  dk7  rt        | j                  j                     } || |	||fd | j                  sdn| j                  | j                  d|\  }} |j                  g |d j!                         }| j#                  |      }||fS )Nr1   r   r2   )r   eager        )r   r   r   )r   r   viewr   r   r   r   r   r   r.   _attn_implementationr   r   r   r   r   r   r   )rB   r   r   r   r   
batch_sizeseq_lenrU   input_shaper   r   current_statesr   r   r]   r\   attention_interfacer   r   s                      r+   r`   zEfficientLoFTRAttention.forward[  s    $1#6#6 
GS#))#2.{{=166z7BPST2$>2D.-[[055j'2sS
{{>277
GRQUQ^Q^_iijkmno**HC';L*VY[^no'p$L*#((Wb$--PZZ[\^_`__Z"dmmLVVWXZ[\
(?;;++w6"9$++:Z:Z"[$7		%

  #}}C$2H2HLL	%
 	%
!\ *k));;;;FFHkk+.L((r*   ry   )r!   r"   r#   r$   r   r   r6   r%   rc   r   r(   r   r   r`   re   rf   s   @r+   r   r   @  s    G
3 
 
4 9=KO	*)||*)  (5*) &eELL%,,,F&GH	*)
 +,*) 
u||Xell33	4*)r*   r   c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )EfficientLoFTRMLPr.   c                 0   t         |           |j                  }|j                  }t	        j
                  |dz  |d      | _        t        |j                     | _	        t	        j
                  ||d      | _
        t	        j                  |      | _        y )Nr2   Fr   )r5   r6   r@   intermediate_sizer   r   fc1r
   mlp_activation_functionrr   fc2r   
layer_normrB   r.   r@   r   rK   s       r+   r6   zEfficientLoFTRMLP.__init__  su    (("4499[1_.?eL !?!?@99.%H,,{3r*   r   rN   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }|S ra   )r  rr   r  r  )rB   r   s     r+   r`   zEfficientLoFTRMLP.forward  s@    /6/6r*   )	r!   r"   r#   r   r6   r%   rc   r`   re   rf   s   @r+   r   r     s+    43 4U\\ ell r*   r   c                        e Zd Zdedef fdZ	 	 d
dej                  deej                     dee	ej                  ej                  f      de
e   dej                  f
d	Z xZS )!EfficientLoFTRAggregatedAttentionr.   r   c                     t         |           |j                  | _        t        |      | _        t        ||      | _        t        |      | _        y ra   )	r5   r6   r   r   aggregationr   	attentionr   mlpr   s      r+   r6   z*EfficientLoFTRAggregatedAttention.__init__  sC    )/)I)I&9&A0C$V,r*   r   r   r   r   rN   c                 \   |j                   \  }}}}| j                  ||      \  }}	|j                   \  }}
}}|j                  |d|      }|	j                  |d|      }	 | j                  ||	fd|i|\  }}|j	                  ddd      }|j                  |||
|      }t
        j                  j                  j                  || j                  dd      }t        j                  ||gd	      }|j	                  ddd
d      }| j                  |      }|j	                  dd
dd      }||z   }|S )Nr1   r   r   r2   r   bilinearFscale_factormodealign_cornersrT   r   )r   r
  r   r  r   r%   r   r   interpolater   catr  )rB   r   r   r   r   r   	embed_dimrD   aggregated_hidden_states aggregated_encoder_hidden_statesaggregated_haggregated_wr   intermediate_statesoutput_statess                  r+   r`   z)EfficientLoFTRAggregatedAttention.forward  so    '4&9&9#
Iq! FJEUEU0F
B "B ,D+I+I(<q $<#C#CJPRT]#^ +K+S+ST^`bdm+n('$,
 !4
 	
Q "))!Q2!))*i|\hh))55d&D&D:ej 6 
 $ii(D!L199!Q1E!45%--aAq9%5r*   ry   )r!   r"   r#   r   r   r6   r%   rc   r   r(   r   r   r`   re   rf   s   @r+   r  r    s    -3 - - 9=KO	'||'  (5' &eELL%,,,F&GH	'
 +,' 
'r*   r  c                        e Zd Zdedef fdZdej                  deej                  ej                  f   de	e
   dej                  fdZ xZS )	*EfficientLoFTRLocalFeatureTransformerLayerr.   r   c                 f    t         |           t        ||      | _        t        ||      | _        y ra   )r5   r6   r  self_attentioncross_attentionr   s      r+   r6   z3EfficientLoFTRLocalFeatureTransformerLayer.__init__  s,    ?	R@Sr*   r   r   r   rN   c                 B   |j                   \  }}}}}|j                  d|||      } | j                  |fd|i|}|j                  dd|||      }	|	j                  d      }	|	j                  d|||      }	 | j                  ||	fi |}|j                  |d|||      }|S )Nr1   r   r2   r   )r   r   r  flipr   )
rB   r   r   r   r   rD   r  heightwidthr   s
             r+   r`   z2EfficientLoFTRLocalFeatureTransformerLayer.forward  s     3@2E2E/
Ay&%%--b)VUK+++MmObmflm - 5 5b!YPU V 5 : :1 = 5 = =b)VUZ [,,,]<Q\U[\%--j"iQVWr*   )r!   r"   r#   r   r   r6   r%   rc   r(   r   r   r`   re   rf   s   @r+   r  r    se    T3 T T|| #5<<#=> +,	
 
r*   r  c                        e Zd Zdef fdZdej                  deej                  ej                  f   dee	   dej                  fdZ
 xZS )%EfficientLoFTRLocalFeatureTransformerr.   c           	          t         |           t        j                  t	        |j
                        D cg c]  }t        ||       c}      | _        y c c}w )N)r   )r5   r6   r   r   r   num_attention_layersr  layers)rB   r.   irK   s      r+   r6   z.EfficientLoFTRLocalFeatureTransformer.__init__  sM    mm v::; ;6QO
s   Ar   r   r   rN   c                 >    | j                   D ]  } ||fd|i|} |S )Nr   )r)  )rB   r   r   r   layers        r+   r`   z-EfficientLoFTRLocalFeatureTransformer.forward  s5     [[ 	dE!-cEXc\bcM	dr*   )r!   r"   r#   r   r6   r%   rc   r(   r   r   r`   re   rf   s   @r+   r&  r&    s[    
3 
|| #5<<#=> +,	
 
r*   r&  c                   |     e Zd Zdededef fdZdej                  dej                  dej                  fdZ xZ	S )	EfficientLoFTROutConvBlockr.   r@   r   c                 N   t         |           t        j                  ||dddd      | _        t        j                  ||dddd      | _        t        j                  |      | _        t        |j                            | _
        t        j                  ||dddd      | _        y )Nr   r   Fru   rv   rj   rk   r   )r5   r6   r   rl   	out_conv1	out_conv2rn   
batch_normr	   r  rr   	out_conv3r  s       r+   r6   z#EfficientLoFTROutConvBlock.__init__  s    ;0AqYZdelqr0aST[`
 ..):;!&"@"@AC#4kqYZdelqrr*   r   residual_statesrN   c                     | j                  |      }||z   }| j                  |      }| j                  |      }| j                  |      }| j	                  |      }t
        j                  j                  |ddd      }|S Ng       @r  Fr  )r1  r2  r3  rr   r4  r   r   r  )rB   r   r5  s      r+   r`   z"EfficientLoFTROutConvBlock.forward
  s|    ..9)M9..9///:///:..9--33#Je 4 
 r*   r   rf   s   @r+   r.  r.    sL    	s3 	s# 	sZ] 	s
U\\ 
ELL 
UZUaUa 
r*   r.  c                        e Zd Zdef fdZdej                  deej                     dej                  fdZdej                  deej                     de	ej                  ej                  f   fd	Z
 xZS )
EfficientLoFTRFineFusionLayerr.   c                 p   t         |           |j                  | _        |j                  }t	        j
                  |d   |d   dddd      | _        t	        j                         | _        t        dt        |            D ]3  }t        |||   ||dz
           }| j                  j                  |       5 y )Nr   r   Fr0  )r5   r6   fine_kernel_sizefine_fusion_dimsr   rl   out_convr   out_conv_layersr   r   r.  r   )rB   r.   r<  r*  r=  rK   s        r+   r6   z&EfficientLoFTRFineFusionLayer.__init__  s     & 7 7!22		Q!1!!4!AWX_d
  "}}q#./0 	2A1&:J1:MO_`ade`eOfgH  ''1	2r*   r   r5  rN   c                     | j                  |      }t        j                  j                  |ddd      }t	        | j
                        D ]  \  }} ||||         } |S r7  )r=  r   r   r  	enumerater>  )rB   r   r5  r*  r,  s        r+   forward_pyramidz-EfficientLoFTRFineFusionLayer.forward_pyramid&  so    
 m411*E 2 
 "$"6"67 	EHAu!-1CDM	E r*   coarse_featuresresidual_featuresc                    |j                   \  }}}}}|j                  d|||      }t        t        |            }| j	                  ||      }|j                   \  }}	}
}|j                  |d|	|
|      }|dddf   }|dddf   }t        |
|z        }t        j                  j                  || j                  |d      }|j                   \  }}}|j                  |d| j                  dz  |      }|j                  dddd      }t        j                  j                  || j                  dz   |d      }|j                  |d| j                  dz   dz  |      }|j                  dddd      }||fS )a?  
        For each image pair, compute the fine features of pixels.
        In both images, compute a patch of fine features center cropped around each coarse pixel.
        In the first image, the feature patch is kernel_size large and long.
        In the second image, it is (kernel_size + 2) large and long.
        r1   r2   Nr   r   r   r   )r   r   r   reversedrA  r   r   r   unfoldr;  r   )rB   rB  rC  r   rD   r  coarse_heightcoarse_widthfine_featuresfine_embed_dimfine_height
fine_widthfine_features_0fine_features_1rv   r   s                   r+   r`   z%EfficientLoFTRFineFusionLayer.forward4  s    AP@U@U=
Ay-)11"iP\] *;!<= ,,_>OP5B5H5H2>;
%--j!^[Zde'1-'1- [M12--..)>)>vWX / 
 (--1g)11*b$BWBWYZBZ\cd)11!Q1=--..)>)>)B6[\ / 
 *11*b4CXCX[\C\abBbdkl)11!Q1=//r*   )r!   r"   r#   r   r6   r%   rc   r   rA  r(   r`   re   rf   s   @r+   r9  r9    s    23 2|| ell+ 
	'0'0  -'0 
u||U\\)	*	'0r*   r9  c                       e Zd ZdZeZdZdZdZdZ	dZ
eedZdej                  ddfd	Zdej$                  dej$                  fd
Zy)EfficientLoFTRPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    efficientloftrpixel_valuesT)r   r    r   rN   Nc                 6   t        |t        j                  t        j                  t        j                  t        j
                  f      rm|j                  j                  j                  d| j                  j                         |j                  %|j                  j                  j                          yyt        |t        j                        rJ|j                  j                  j                          |j                  j                  j                  d       yy)zInitialize the weightsr   )meanstdNg      ?)rX   r   r   rl   Conv1drn   weightdatanormal_r.   initializer_rangerk   zero_r   fill_)rB   r   s     r+   _init_weightsz+EfficientLoFTRPreTrainedModel._init_weightsp  s    fryy"))RYYOPMM&&CT[[5R5R&S{{&  &&( '-KK""$MM$$S) .r*   c                 :    |dddddddf   dddddddf   S )aK  
        Assuming pixel_values has shape (batch_size, 3, height, width), and that all channels values are the same,
        extract the first channel value to get a tensor of shape (batch_size, 1, height, width) for EfficientLoFTR. This is
        a workaround for the issue discussed in :
        https://github.com/huggingface/transformers/pull/25786#issuecomment-1730176446

        Args:
            pixel_values: torch.FloatTensor of shape (batch_size, 3, height, width)

        Returns:
            pixel_values: torch.FloatTensor of shape (batch_size, 1, height, width)

        Nr   r)   )rB   rR  s     r+    extract_one_channel_pixel_valuesz>EfficientLoFTRPreTrainedModel.extract_one_channel_pixel_values{  s$     Aq!QJ'4A66r*   )r!   r"   r#   r$   r   config_classbase_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_supports_sdpar{   r   _can_record_outputsr   Moduler]  r%   r&   r_  r)   r*   r+   rP  rP  ^  so    
 (L($O&*#N2-
*BII *$ *7U=N=N 7SXSdSd 7r*   rP  za
    EfficientLoFTR model taking images as inputs and outputting the features of the images.
    c                        e Zd Zdef fdZee	 ddej                  de	ej                     dee   defd              Z xZS )	EfficientLoFTRModelr.   c                     t         |   |       || _        t        |      | _        t        |      | _        t        |      | _        | j                          y )N)r.   )
r5   r6   r.   r   backboner&  local_feature_transformerr-   
rotary_emb	post_initrB   r.   rK   s     r+   r6   zEfficientLoFTRModel.__init__  sF     +F3)Nv)V&7vFr*   rR  labelsr   rN   c                    |t        d      |j                  dk7  s|j                  d      dk7  rt        d      |j                  \  }}}}}|j	                  |dz  |||      }| j                  |      }| j                  |      }	|	d   }
|	dd }|
j                  dd \  }}}| j                  |
      \  }}|j                  |dz  ddd      j	                  |dz  d|      }|j                  |dz  ddd      j	                  |dz  d|      }||f}|
j	                  |d|||      }
 | j                  |
fd	|i|}
|
ft        |      z   }	t        |	
      S )  
        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoModel
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "https://github.com/magicleap/SuperGluePretrainedNetwork/blob/master/assets/phototourism_sample_images/london_bridge_78916675_4568141288.jpg?raw=true"
        >>> image1 = Image.open(requests.get(url, stream=True).raw)
        >>> url = "https://github.com/magicleap/SuperGluePretrainedNetwork/blob/master/assets/phototourism_sample_images/london_bridge_19481797_2295892421.jpg?raw=true"
        >>> image2 = Image.open(requests.get(url, stream=True).raw)
        >>> images = [image1, image2]

        >>> processor = AutoImageProcessor.from_pretrained("zju-community/efficient_loftr")
        >>> model = AutoModel.from_pretrained("zju-community/efficient_loftr")

        >>> with torch.no_grad():
        >>>     inputs = processor(images, return_tensors="pt")
        >>>     outputs = model(**inputs)
        ```Nz>EfficientLoFTR is not trainable, no labels should be provided.   r   r2   zOInput must be a 5D tensor of shape (batch_size, 2, num_channels, height, width)r1   r   )feature_maps)
ValueErrorndimsizer   r   r_  rk  rm  r:   rl  r(   r   )rB   rR  rp  r   r   rD   channelsr#  r$  featuresrB  rC  coarse_embed_dimrG  rH  r]   r\   r   s                     r+   r`   zEfficientLoFTRModel.forward  s   < ]^^!\%6%6q%9Q%>noo1=1C1C.
Ax#++JNHfeT<<\J ==."2,$SbM8G8M8Mbc8R5- ???3SjjaR4<<Z!^RQabjjaR4<<Z!^RQab"Cj)11*aAQS`bno8$88
1D
HN
 $%.?(@@844r*   ra   )r!   r"   r#   r   r6   r   r   r%   r&   r   rd   r   r   r   r`   re   rf   s   @r+   ri  ri    sn    3   .2;5'';5 ))*;5 +,	;5
 
;5  ;5r*   ri  tensorborder_marginc                 l    |dk  r| S || ddd|d|d|d|f<   || dd| d| d| d| df<   | S )a  
    Mask a tensor border with a given value

    Args:
        tensor (`torch.Tensor` of shape `(batch_size, height_0, width_0, height_1, width_1)`):
            The tensor to mask
        border_margin (`int`) :
            The size of the border
        value (`Union[bool, int, float]`):
            The value to place in the tensor's borders

    Returns:
        tensor (`torch.Tensor` of shape `(batch_size, height_0, width_0, height_1, width_1)`):
            The masked tensor
    r   Nr)   )r|  r}  r   s      r+   mask_borderr    s`      PUF1n}nn}nn}nn}nLMTYF1}no~-PQMr*   r#  r$  normalized_coordinatesrC   rW   c                 T   t        j                  d|dz
  |||      }t        j                  d| dz
  | ||      }|r||dz
  z  dz
  dz  }|| dz
  z  dz
  dz  }t        j                  t        j                  ||d      d	      }|j	                  ddd      j                  d      }|S )
aK  
    Copied from kornia library : kornia/kornia/utils/grid.py:26

    Generate a coordinate grid for an image.

    When the flag ``normalized_coordinates`` is set to True, the grid is
    normalized to be in the range :math:`[-1,1]` to be consistent with the pytorch
    function :py:func:`torch.nn.functional.grid_sample`.

    Args:
        height (`int`):
            The image height (rows).
        width (`int`):
            The image width (cols).
        normalized_coordinates (`bool`):
            Whether to normalize coordinates in the range :math:`[-1,1]` in order to be consistent with the
            PyTorch function :py:func:`torch.nn.functional.grid_sample`.
        device (`torch.device`):
            The device on which the grid will be generated.
        dtype (`torch.dtype`):
            The data type of the generated grid.

    Return:
        grid (`torch.Tensor` of shape `(1, height, width, 2)`):
            The grid tensor.

    Example:
        >>> create_meshgrid(2, 2)
        tensor([[[[-1., -1.],
                  [ 1., -1.]],
        <BLANKLINE>
                 [[-1.,  1.],
                  [ 1.,  1.]]]])

        >>> create_meshgrid(2, 2, normalized_coordinates=False)
        tensor([[[[0., 0.],
                  [1., 0.]],
        <BLANKLINE>
                 [[0., 1.],
                  [1., 1.]]]])

    r   r   rV         ?r2   ij)indexingr1   rT   )r%   linspacer   meshgridr   r>   )r#  r$  r  rC   rW   xsysgrids           r+   create_meshgridr    s    b 
519eF%	HB	6A:vfE	JBEAI$)FQJ#%*;;u~~b"t<"ED<<1a **1-DKr*   inputc                    | j                   \  }}}}t        |||| j                        }|j                  | j                        }|d   j                  d      }|d   j                  d      }| j                  ||d      }	t        j                  ||	z  dd      }
t        j                  ||	z  dd      }t        j                  ||
gd      }|j                  ||d      S )aL  
    Copied from kornia library : kornia/geometry/subpix/dsnt.py:76
    Compute the expectation of coordinate values using spatial probabilities.

    The input heatmap is assumed to represent a valid spatial probability distribution,
    which can be achieved using :func:`~kornia.geometry.subpixel.spatial_softmax2d`.

    Args:
        input (`torch.Tensor` of shape `(batch_size, embed_dim, height, width)`):
            The input tensor representing dense spatial probabilities.
        normalized_coordinates (`bool`):
            Whether to return the coordinates normalized in the range of :math:`[-1, 1]`. Otherwise, it will return
            the coordinates in the range of the input shape.

    Returns:
        output (`torch.Tensor` of shape `(batch_size, embed_dim, 2)`)
            Expected value of the 2D coordinates. Output order of the coordinates is (x, y).

    Examples:
        >>> heatmaps = torch.tensor([[[
        ... [0., 0., 0.],
        ... [0., 0., 0.],
        ... [0., 1., 0.]]]])
        >>> spatial_expectation2d(heatmaps, False)
        tensor([[[1., 2.]]])

    .r   r1   .r   Tkeepdimr2   )
r   r  rC   r_   rW   r   r   r%   sumr  )r  r  r   r  r#  r$  r  pos_xpos_y
input_flat
expected_y
expected_xoutputs                r+   spatial_expectation2dr  /  s    8 ,1;;(J	65 65*@%,,OD775;;DL  $EL  $EJ	26J 5:-r4@J5:-r4@JYY
J/4F;;z9a00r*   z[
    EfficientLoFTR model taking images as inputs and outputting the matching of them.
    c                       e Zd ZdZdef fdZdej                  deej                  ej                  f   fdZ	dej                  de
deej                  ej                  ej                  f   fd	Zd
ej                  dej                  dede
deej                  ej                  f   f
dZdej                  dej                  d
ej                  dede
dej                  fdZdej                  dej                  dej                  de
dej                  f
dZee	 ddej$                  deej(                     dee   defd              Z xZS )!EfficientLoFTRForKeypointMatchinga  EfficientLoFTR dense image matcher

    Given two images, we determine the correspondences by:
      1. Extracting coarse and fine features through a backbone
      2. Transforming coarse features through self and cross attention
      3. Matching coarse features to obtain coarse coordinates of matches
      4. Obtaining full resolution fine features by fusing transformed and backbone coarse features
      5. Refining the coarse matches using fine feature patches centered at each coarse match in a two-stage refinement

    Yifan Wang, Xingyi He, Sida Peng, Dongli Tan and Xiaowei Zhou.
    Efficient LoFTR: Semi-Dense Local Feature Matching with Sparse-Like Speed
    In CVPR, 2024. https://arxiv.org/abs/2403.04765
    r.   c                     t         |   |       || _        t        |      | _        t        |      | _        | j                          y ra   )r5   r6   r.   ri  rQ  r9  refinement_layerrn  ro  s     r+   r6   z*EfficientLoFTRForKeypointMatching.__init__s  s;     1&9 =f Er*   scoresrN   c                    |j                   \  }}}}}|j                  |||z  ||z        }|j                  dd      j                  }|j                  dd      j                  }|| j                  j
                  kD  }	|	j                  |||||      }	t        |	| j                  j                  d      }	|	j                  |||z  ||z        }	|	||k(  z  ||k(  z  }	||	z  }
|
j                  d      \  }}|
j                  d      \  }}t        j                  ||g      j                  |dd      }t        j                  ||gd      }t        j                  |dkD  |d      }||fS )	ai  
        Based on a keypoint score matrix, compute the best keypoint matches between the first and second image.
        Since each image pair can have different number of matches, the matches are concatenated together for all pair
        in the batch and a batch_indices tensor is returned to specify which match belong to which element in the batch.

        Note:
            This step can be done as a postprocessing step, because does not involve any model weights/params.
            However, we keep it in the modeling code for consistency with other keypoint matching models AND for
            easier torch.compile/torch.export (all ops are in torch).

        Args:
            scores (`torch.Tensor` of shape `(batch_size, height_0, width_0, height_1, width_1)`):
                Scores of keypoints

        Returns:
            matched_indices (`torch.Tensor` of shape `(2, num_matches)`):
                Indices representing which pixel in the first image matches which pixel in the second image
            matching_scores (`torch.Tensor` of shape `(num_matches,)`):
                Scores of each match
        r2   Tr  r   Fr1   rT   r   )r   r   maxvaluesr.   coarse_matching_thresholdr   r  coarse_matching_border_removalr%   r  r   where)rB   r  r   height0width0height1width1max_0max_1maskmasked_scoresmatching_scores_0max_indices_0matching_scores_1max_indices_1matching_indicesr   s                    r+   _get_matches_from_scoresz:EfficientLoFTRForKeypointMatching._get_matches_from_scores|  s{   * 8>||4
GVWfZ6)97V;KL 

1d
+22

1d
+22 === ||J&I4!K!KUS||J&(8'F:JK v'6U?; +8+<+<Q+?(=+8+<+<Q+?(= 99m]%CDLLZYZ\^_++'8:K&LRST !;;':<LbQ00r*   rB  coarse_scalec                 v   |j                   \  }}}}}|j                  ddddd      }|j                  |dd|      }||j                   d   dz  z  }|dddf   }|dddf   }	||	j                  dd	      z  }
|
| j                  j
                  z  }
| j                  j                  r|
}nAt        j                  j                  |
d      t        j                  j                  |
d      z  }|j                  |||||      }| j                  |      \  }}t        j                  ||z  ||z  gd
      |z  }|||fS )a  
        For each image pair, compute the matching confidence between each coarse element (by default (image_height / 8)
        * (image_width / 8 elements)) from the first image to the second image.

        Note:
            This step can be done as a postprocessing step, because does not involve any model weights/params.
            However, we keep it in the modeling code for consistency with other keypoint matching models AND for
            easier torch.compile/torch.export (all ops are in torch).

        Args:
            coarse_features (`torch.Tensor` of shape `(batch_size, 2, hidden_size, coarse_height, coarse_width)`):
                Coarse features
            coarse_scale (`float`): Scale between the image size and the coarse size

        Returns:
            keypoints (`torch.Tensor` of shape `(batch_size, 2, num_matches, 2)`):
                Keypoints coordinates.
            matching_scores (`torch.Tensor` of shape `(batch_size, 2, num_matches)`):
                The confidence matching score of each keypoint.
            matched_indices (`torch.Tensor` of shape `(batch_size, 2, num_matches)`):
                Indices which indicates which keypoint in an image matched with which keypoint in the other image. For
                both image in the pair.
        r   r   r      r2   r1   r  Nr   rT   )r   r   r   r   r.   coarse_matching_temperaturecoarse_matching_skip_softmaxr   r   r   r   r  r%   r   )rB   rB  r  r   rD   r  r#  r$  coarse_features_0coarse_features_1
similarity
confidencematched_indicesr   r   s                  r+   _coarse_matchingz2EfficientLoFTRForKeypointMatching._coarse_matching  sM   4 3B2G2G/
Ay&% *11!Q1a@)11*aYO)O,A,A",E,LL+AqD1+AqD1&):)D)DR)LL
$++"I"II
;;33#J..z1=@U@UV`bc@ddJ__ZN
+/+H+H+T(KK5!8/U:R SY[\_kk	/?::r*   fine_confidencecoarse_matched_keypointsfine_window_size
fine_scalec           
      &   |j                   \  }}}}t        |dz        }|j                  ||d      }t        j                  |d      \  }	}
|
d   }
|
|z  }|
|z  }t        ||d|j                  |j                        }||dz  z
  dz   }|j                  dddd      j                  ||dd      }t        j                  |d|j                  d      j                  dddd            j                  d      }t        j                  |d|j                  d      j                  dddd            j                  d      }|d	d	d
f   ||z  z   }|d	d	df   ||z  z   }t        j                  ||gd      }
t        j                  ||gd      }|
|fS )a  
        For each coarse pixel, retrieve the highest fine confidence score and index.
        The index represents the matching between a pixel position in the fine window in the first image and a pixel
        position in the fine window of the second image.
        For example, for a fine_window_size of 64 (8 * 8), the index 2474 represents the matching between the index 38
        (2474 // 64) in the fine window of the first image, and the index 42 in the second image. This means that 38
        which corresponds to the position (4, 6) (4 // 8 and 4 % 8) is matched with the position (5, 2). In this example
        the coarse matched coordinate will be shifted to the matched fine coordinates in the first and second image.

        Note:
            This step can be done as a postprocessing step, because does not involve any model weights/params.
            However, we keep it in the modeling code for consistency with other keypoint matching models AND for
            easier torch.compile/torch.export (all ops are in torch).

        Args:
            fine_confidence (`torch.Tensor` of shape `(num_matches, fine_window_size, fine_window_size)`):
                First stage confidence of matching fine features between the first and the second image
            coarse_matched_keypoints (`torch.Tensor` of shape `(2, num_matches, 2)`):
                Coarse matched keypoint between the first and the second image.
            fine_window_size (`int`):
                Size of the window used to refine matches
            fine_scale (`float`):
                Scale between the size of fine features and coarse features

        Returns:
            indices (`torch.Tensor` of shape `(2, num_matches, 1)`):
                Indices of the fine coordinate matched in the fine window
            fine_matches (`torch.Tensor` of shape `(2, num_matches, 2)`):
                Coordinates of matched keypoints after the first fine stage
        r  r1   rT   .NF)r  rC   rW   r2   r   Nr   )r   r   r   r%   r  r  rC   rW   r:   gatherr>   squeezer   )rB   r  r  r  r  r   num_keypointsrD   r;  r  indices	indices_0	indices_1r  delta_0delta_1fine_matches_0fine_matches_1fine_matchess                      r+   _get_first_stage_fine_matchingz@EfficientLoFTRForKeypointMatching._get_first_stage_fine_matching  s   J +:*?*?'
M1a$%5s%:;)11*mRP))O<)$//	..	#("))!''
 '1,-3||Aq"a(//
M2rR,,tQ	(;(;B(?(F(Fr2rST(UV^^_`a,,tQ	(;(;B(?(F(Fr2rST(UV^^_`a1!Q$7'J:NN1!Q$7'J:NN++y)4!<{{NN#CK$$r*   r  r  c                    |j                   \  }}}}t        |dz        }	|dddf   }
|dddf   }||	z  }||	z  }t        j                  ||
j                        j                  |ddd      }t        j                  ||
j                        j                  d|dd      }|
d   }
|d   }|d   }t        ddd|
j                  	      j                  t        j                        }|d
   }||d   z   }||d   z   }|j                  ||||	dz   |	dz         }||||
||f   }|j                  ||d      }t        j                  j                  || j                  j                  z  d      }|j                  ||dd      }t        |d      d   }|dddf   }|dddf   |dz  |z  z   }t        j                  ||gd      }|S )a  
        For the given position in their respective fine windows, retrieve the 3x3 fine confidences around this position.
        After applying softmax to these confidences, compute the 2D spatial expected coordinates.
        Shift the first stage fine matching with these expected coordinates.

        Note:
            This step can be done as a postprocessing step, because does not involve any model weights/params.
            However, we keep it in the modeling code for consistency with other keypoint matching models AND for
            easier torch.compile/torch.export (all ops are in torch).

        Args:
            indices (`torch.Tensor` of shape `(batch_size, 2, num_keypoints)`):
                Indices representing the position of each keypoint in the fine window
            fine_matches (`torch.Tensor` of shape `(2, num_matches, 2)`):
                Coordinates of matched keypoints after the first fine stage
            fine_confidence (`torch.Tensor` of shape `(num_matches, fine_window_size, fine_window_size)`):
                Second stage confidence of matching fine features between the first and the second image
            fine_window_size (`int`):
                Size of the window used to refine matches
            fine_scale (`float`):
                Scale between the size of fine features and coarse features

        Returns:
            fine_matches (`torch.Tensor` of shape `(2, num_matches, 2)`):
                Coordinates of matched keypoints after the second fine stage
        r  Nr   r   )rC   r  r   T)r  rC   )N.r  r  r2   	   r1   rT   )r   r   r%   arangerC   r   r  r_   longr   r   r   r.   !fine_matching_regress_temperaturer  r   )rB   r  r  r  r  r  r   r  rD   r;  r  r  indices_1_iindices_1_jbatch_indicesmatches_indicesdeltaheatmapfine_coordinates_normalizedr  r  s                        r+   _get_second_stage_fine_matchingzAEfficientLoFTRForKeypointMatching._get_second_stage_fine_matching&  s   D +:*?*?'
M1a$%5s%:;AqDM	AqDM	#33"22 Z	8H8HIQQR\^_abdef,,}Y=M=MNVVWXZgijlmni(	!),!),1T)JZJZ[^^_d_i_iji !E&M1!E&M1)11'79IA9MO_bcOc
 *-)U`bm*mn)11*mQO--//dkkKKKQS 0 
 "))*mQJ&;GT&J1&M#%ad+%ad+/Jf/UXb/bc{{NN#CKr*   rM  rN  c                    |j                   \  }}}}| j                  j                  }	t        |dz        }
t	        j
                  |||	z
  d      }t	        j
                  |||	z
  d      }|d   }|d   }||j                   d   dz  z  }||j                   d   dz  z  }||j                  dd      z  }t        j                  j                  |d      t        j                  j                  |d      z  }|j                  ||||
dz   |
dz         }|dddddf   }|j                  ||||      }| j                  ||||      \  }}|d   }|d   }||	dz  z  }||j                  dd      z  }| j                  |||||      }|S )a  
        For each coarse pixel with a corresponding window of fine features, compute the matching confidence between fine
        features in the first image and the second image.

        Fine features are sliced in two part :
        - The first part used for the first stage are the first fine_hidden_size - config.fine_matching_slicedim (64 - 8
         = 56 by default) features.
        - The second part used for the second stage are the last config.fine_matching_slicedim (8 by default) features.

        Each part is used to compute a fine confidence tensor of the following shape :
        (batch_size, (coarse_height * coarse_width), fine_window_size, fine_window_size)
        They correspond to the score between each fine pixel in the first image and each fine pixel in the second image.

        Args:
            fine_features_0 (`torch.Tensor` of shape `(num_matches, fine_kernel_size ** 2, fine_kernel_size ** 2)`):
                Fine features from the first image
            fine_features_1 (`torch.Tensor` of shape `(num_matches, (fine_kernel_size + 2) ** 2, (fine_kernel_size + 2)
            ** 2)`):
                Fine features from the second image
            coarse_matched_keypoints (`torch.Tensor` of shape `(2, num_matches, 2)`):
                Keypoint coordinates found in coarse matching for the first and second image
            fine_scale (`int`):
                Scale between the size of fine features and coarse features

        Returns:
            fine_coordinates (`torch.Tensor` of shape `(2, num_matches, 2)`):
                Matched keypoint between the first and the second image. All matched keypoints are concatenated in the
                second dimension.

        r  r1   r   r   r   r2   .)r   r.   fine_matching_slice_dimr   r%   splitr   r   r   r   r   r  r  )rB   rM  rN  r  r  r   r  r  rJ  r  r;  split_fine_features_0split_fine_features_1r  first_stage_fine_confidencefine_indicesr  second_stage_fine_confidencefine_coordinatess                      r+   _fine_matchingz0EfficientLoFTRForKeypointMatching._fine_matchingq  s   J GVF[F[C
M#3^"&++"E"E$%5s%:; !&O^Ne=egi j %O^Ne=egi j 02/2 *O,A,A",E,LL)O,A,A",E,LL *O,E,Eb",MM--//CbmmF[F[\kmnFoo)11'79IA9MO_bcOc
 *#qtQrT/:&5&=&='79I'
# &*%H%H'$	&
"l 02/2 *,CS,HH (79R9RSUWY9Z'Z$??(
  r*   rR  rp  r   c                    |t        d       | j                  |fi |}|j                  }|d   }|j                  dd \  }}}	|j                  \  }
}}}}||z  }| j	                  ||      \  }}}|dd }| j                  ||      \  }}|j                  \  }}}t        j                  |
      d   }|||dddf   f   }|||dddf   f   }t        ||z        }||z  }| j                  ||||      }|dddddddf   |z  |dddddddf<   |dddddddf   |z  |dddddddf<   t        ||||j                  |j                        S )rr  Nz9SuperGlue is not trainable, no labels should be provided.r   rt  r   r  )r   r   r   r   r    )rv  rQ  ru  r   r  r  r%   r  r   r  r   r   r    )rB   rR  rp  r   model_outputsrz  rB  r{  rG  rH  r   rD   ry  r#  r$  r  coarse_keypointscoarse_matching_scorescoarse_matched_indicesrC  rM  rN  r  r  rK  r  matching_keypointss                              r+   r`   z)EfficientLoFTRForKeypointMatching.forward  s   < XYY )<(;(;L(SF(S -- #1+8G8M8Mbc8R5-1=1C1C.
Ax-KOK`K`\L
H02H
 %QRL+/+@+@Rc+d( 5::1mZ0;)-9OPQSTPT9U*UV)-9OPQSTPT9U*UV   <=k)
!00/Sceop);Aq!QJ)G%)O1aA:&);Aq!QJ)G&)P1aA:&%*2('55$//
 	
r*   ra   )r!   r"   r#   r$   r   r6   r%   rc   r(   r  r9   r  r   r  r  r  r   r   r&   r   rd   r   r   r   r`   re   rf   s   @r+   r  r  _  s   3 31u|| 31ellTYT`T`F`@a 31j1;$||1;;@1;	u||U\\5<<7	81;f@%@% #(,,@% 	@%
 @% 
u||U\\)	*@%DII llI 	I
 I I 
IVZ Z  Z  #(,,	Z 
 Z  
Z x  .2D
''D
 ))*D
 +,	D

 
 D
  D
r*   r  )rP  ri  r  r   )r   )FNN)T)@dataclassesr   typingr   r   r   r%   r   activationsr	   r
   modeling_layersr   modeling_outputsr   modeling_rope_utilsr   modeling_utilsr   r   processing_utilsr   utilsr   r   r   r   r   utils.genericr   configuration_efficientloftrr   r   rg  r-   rh   r{   r   r   r   r   r   rc   r   r   r9   r   r   r   r  r  r&  r.  r9  rP  ri  boolr  rC   rW   r  r  r  __all__r)   r*   r+   <module>r     s1   " , ,   * 9 . 6 F &  0 > :[ : :0$BII $P")) * : <		 &")) *#4RYY #4N<>	UU\\ 	U# 	U%,, 	U( %II%<<% 
% <<	%
 U\\*% % % '(%4E)bii E)P		 $0		 0f1K 8BII * 2D0BII D0N *7O *7 *7Z 
H57 H5
H5V S tUTWGWAX ]b]i]i 6 $)%)#'8#u||#$8ell"#8 !8 U\\"	8
 EKK 8 \\8v-1 -1t -1W\WcWc -1` 
o
(E o

o
d hr*   