
    rh                     *   d dl mZ d dlmZmZmZ d dlZd dlZd dlm	Z	 d dl
mZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ e ed       G d de                    Z G d de	j@                        Z!d Z"d@dZ#dejH                  de%dejH                  fdZ&	 dAde	j@                  dejH                  dejH                  d ejH                  d!eejH                     d"e'd#e'd$ee   fd%Z( G d& d'e	j@                        Z) G d( d)e	j@                        Z* G d* d+e	j@                        Z+d,ejH                  d-ejH                  d.ejH                  dejH                  fd/Z, G d0 d1e	j@                        Z- G d2 d3e	j@                        Z.e G d4 d5e             Z/d6ejH                  d7e'de0ejH                  ejH                  f   fd8Z1d9ejH                  d:e%d;e%dejH                  fd<Z2 ed=       G d> d?e/             Z3d5d?gZ4y)B    )	dataclass)CallableOptionalUnionN)nnpad_sequence   )ACT2FN)FlashAttentionKwargs)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)ModelOutputTransformersKwargsauto_docstring)can_return_tuple   )AutoModelForKeypointDetection   )LightGlueConfiga  
    Base class for outputs of LightGlue keypoint matching models. Due to the nature of keypoint detection and matching,
    the number of keypoints is not fixed and can vary from image to image, which makes batching non-trivial. In the
    batch of images, the maximum number of matches is set as the dimension of the matches and matching scores. The mask
    tensor is used to indicate which values in the keypoints, matches, matching_scores and prune tensors are keypoint
    matching information.
    )custom_introc                   ^   e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeej                     ed<   dZeej                     ed<   dZeej                     ed<   dZeeej                        ed	<   dZeeej                        ed
<   y)LightGlueKeypointMatchingOutputa  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*):
        Loss computed during training.
    matches (`torch.FloatTensor` of shape `(batch_size, 2, num_matches)`):
        Index of keypoint matched in the other image.
    matching_scores (`torch.FloatTensor` of shape `(batch_size, 2, num_matches)`):
        Scores of predicted matches.
    keypoints (`torch.FloatTensor` of shape `(batch_size, num_keypoints, 2)`):
        Absolute (x, y) coordinates of predicted keypoints in a given image.
    prune (`torch.IntTensor` of shape `(batch_size, num_keypoints)`):
        Pruning mask indicating which keypoints are removed and at which layer.
    mask (`torch.BoolTensor` of shape `(batch_size, num_keypoints)`):
        Mask indicating which values in matches, matching_scores, keypoints and prune are keypoint matching
        information.
    hidden_states (`Tuple[torch.FloatTensor, ...]`, *optional*):
        Tuple of `torch.FloatTensor` (one for the output of each stage) of shape `(batch_size, 2, num_channels,
        num_keypoints)` returned when `output_hidden_states=True` is passed or when
        `config.output_hidden_states=True`
    attentions (`Tuple[torch.FloatTensor, ...]`, *optional*):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, 2, num_heads, num_keypoints,
        num_keypoints)` returned when `output_attentions=True` is passed or when
        `config.output_attentions=True`
    Nlossmatchesmatching_scores	keypointsprunemaskhidden_states
attentions)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   r   r   	IntTensorr    r!   tupler"        /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/lightglue/modeling_lightglue.pyr   r   &   s    0 )-D(5$$
%,+/GXe''(/37OXe//07-1Ix))*1'+E8EOO$+(,D(5$$
%,8<M8E%"3"345<59Ju00129r-   r   c                        e Zd Zdef fdZ	 ddej                  dee   de	e
ej                     e
ej                  ej                  f   f   fdZ xZS )LightGluePositionalEncoderconfigc                     t         |           t        j                  d|j                  |j
                  z  dz  d      | _        y )Nr   Fbias)super__init__r   Lineardescriptor_dimnum_attention_heads	projectorselfr1   	__class__s     r.   r6   z#LightGluePositionalEncoder.__init__T   s:    1f&;&;v?Y?Y&Y]^&^ejkr-   r   output_hidden_statesreturnc                     | j                  |      }|j                  dd      }t        j                  |      }t        j                  |      }||f}|r||f}|S |f}|S )Nr   dim)r:   repeat_interleaver'   cossin)r<   r   r>   projected_keypoints
embeddingscosinessinesoutputs           r.   forwardz"LightGluePositionalEncoder.forwardX   sq     #nnY7(::1":E
))J'		*%u%
6J*12 R\P]r-   F)r#   r$   r%   r   r6   r'   Tensorr   boolr   r+   rL   __classcell__r=   s   @r.   r0   r0   S   sb    l l
 OT		=Ed^		uU\\"E%,,*D$EE	F	r-   r0   c                     | dd d df   }| ddd df   }t        j                  | |gd      j                  d      }|S )N.r   r   rA   rB   )r'   stackflatten)xx1x2rot_xs       r.   rotate_halfrZ   d   sL    	
3!8B	
319BKK"b	r*2226ELr-   c                 6   | j                   }| j                         } |j                         }|j                  |      }|j                  |      }| |z  t        |       |z  z   }||z  t        |      |z  z   }|j	                  |      |j	                  |      fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    dtype)r]   float	unsqueezerZ   to)	qkrE   rF   position_idsunsqueeze_dimr]   q_embedk_embeds	            r.   apply_rotary_pos_embrg   l   s    ( GGE		A		A
--
&C
--
&C3w;q>C/0G3w;q>C/0G::E:"GJJUJ$;;;r-   r!   n_repr?   c                     | j                   \  }}}}|dk(  r| S | dddddddddf   j                  |||||      } | j                  |||z  ||      S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    r   N)shapeexpandreshape)r!   rh   batchnum_key_value_headsslenhead_dims         r.   	repeat_kvrq      so    
 2?1D1D.Ehz!!Qa"23::5BUW\^bdlmM  (;e(CT8TTr-   modulequerykeyvalueattention_maskscalingdropoutkwargsc                 T   t        || j                        }t        || j                        }	t        j                  ||j	                  dd            |z  }
|#|d d d d d d d |j
                  d   f   }|
|z   }
t        j                  j                  |
dt        j                        j                  |j                        }
t        j                  j                  |
|| j                        }
t        j                  |
|	      }|j	                  dd      j                         }||
fS )Nr   r
   rS   rA   )rC   r]   )ptrainingr   )rq   num_key_value_groupsr'   matmul	transposerj   r   
functionalsoftmaxfloat32r`   r]   rx   r|   
contiguous)rr   rs   rt   ru   rv   rw   rx   ry   
key_statesvalue_statesattn_weightscausal_maskattn_outputs                r.   eager_attention_forwardr      s    3 ; ;<JUF$?$?@L<<z';';Aq'ABWLL!$Q1.D
0@0@0D.D%DE#k1==((2U]](SVVW\WbWbcL==((6??([L,,|\:K''1-88:K$$r-   c                   T    e Zd ZdZdedef fdZ	 	 	 	 ddej                  de	e
ej                  ej                  f      de	ej                     de	ej                     d	e	ej                     d
ee   de
ej                  e	ej                     e	e
ej                        f   fdZ xZS )LightGlueAttentionz=Multi-headed attention from 'Attention Is All You Need' paperr1   	layer_idxc                 d   t         |           || _        || _        t	        |d|j
                  |j                  z        | _        |j                  |j                  z  | _	        | j                  dz  | _
        |j                  | _        d| _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j                  | j                  z  |j
                  |j                        | _        y )Nrp   g      Tr3   )r5   r6   r1   r   getattrhidden_sizer9   rp   rn   r}   rw   attention_dropout	is_causalr   r7   attention_biasq_projk_projv_projo_projr<   r1   r   r=   s      r.   r6   zLightGlueAttention.__init__   sM   "
F4F4F&JdJd4de$*$>$>&B\B\$\!}}d*!'!9!9ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii&&68J8JQWQfQf
r-   r!   position_embeddingsrv   encoder_hidden_statesencoder_attention_maskry   r?   c                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	|d u}
|
r|n|}|
r|n|}| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }||\  }}t        |	|||      \  }	}t        }| j                  j                  dk7  rt        | j                  j                     } || |	|||f| j                  sdn| j                  | j                  d|\  }} |j                  g |d j!                         }| j#                  |      }||fS )NrA   r   r   eager        )rx   rw   )rj   rp   r   viewr   r   r   rg   r   r1   _attn_implementationr   r|   r   rw   rl   r   r   )r<   r!   r   rv   r   r   ry   input_shapehidden_shapequery_statesis_cross_attentioncurrent_statescurrent_attention_maskr   r   rE   rF   attention_interfacer   r   s                       r.   rL   zLightGlueAttention.forward   s    $))#2.88b8$--8{{=166|DNNqRST2$>2D.-;M!7Sa[[055lCMMaQRS
{{>277EOOPQSTU**HC';L*VY[^'_$L*(?;;++w6"9$++:Z:Z"[$7"	%
  $}}C$2H2HLL	%
 	%
!\ *k));;;;FFHkk+.L((r-   )NNNN)r#   r$   r%   r&   r   intr6   r'   rN   r   r+   r   r   rL   rP   rQ   s   @r.   r   r      s    G
 
3 
4 LP158<9=*)||*) &eELL%,,,F&GH*) !.	*)
  (5*) !) 6*) -.*) 
u||Xell3XeELL>Q5RR	S*)r-   r   c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )LightGlueMLPr1   c                 h   t         |           || _        t        |j                     | _        t        j                  |j                  |j                        | _	        t        j                  |j                  |j                        | _        t        j                  |j                  d      | _        y )NT)elementwise_affine)r5   r6   r1   r   
hidden_actactivation_fnr   r7   intermediate_sizefc1r   fc2	LayerNorm
layer_normr;   s     r.   r6   zLightGlueMLP.__init__   s}    #F$5$5699V55v7O7OP99V55v7I7IJ,,v'?'?TXYr-   r!   r?   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }|S N)r   r   r   r   )r<   r!   s     r.   rL   zLightGlueMLP.forward   sB    /6**=9/r-   	r#   r$   r%   r   r6   r'   rN   rL   rP   rQ   s   @r.   r   r      s,    Z ZU\\ ell r-   r   c                        e Zd Zdedef fdZ	 	 ddej                  dej                  dej                  dee	   dee	   d	e
ej                  ee
ej                        ee
ej                        f   fd
Z xZS )LightGlueTransformerLayerr1   r   c                     t         |           t        ||      | _        t	        |      | _        t        ||      | _        t	        |      | _        y r   )r5   r6   r   self_attentionr   self_mlpcross_attention	cross_mlpr   s      r.   r6   z"LightGlueTransformerLayer.__init__	  sD    0C$V,1&)D%f-r-   descriptorsr   rv   r>   output_attentionsr?   c                    |rdnd }|rdnd }|r||fz   }|j                   \  }}	}
| j                  ||||      \  }}t        j                  ||gd      }| j	                  |      }||z   }|r||f}|j                  dd|	|
      j                  d      j                  ||	|
      }|6|j                  dddd|	      j                  d      j                  |dd|	      nd }| j                  ||||      \  }}t        j                  ||gd      }| j                  |      }||z   }|r6||f}||j                  ||	|
      fz   z   |j                  ||	|
      fz   |z   }|r
||fz   |fz   }|||fS )Nr,   )r   rv   r   rA   rB   r   r   )r   r   r   )	rj   r   r'   catr   rl   flipr   r   )r<   r   r   rv   r>   r   all_hidden_statesall_attentions
batch_sizenum_keypointsr8   attention_outputself_attentionsintermediate_statesoutput_statesself_attention_descriptorsself_attention_hidden_statesr   r   cross_attention_outputcross_attentionscross_intermediate_statescross_output_statescross_attention_hidden_statess                           r.   rL   z!LightGlueTransformerLayer.forward  s	    #7BD0d 1[N B4?4E4E1
M> -1,?,? ))/	 -@ -
)/ $ii6F(GRP&9:%0=%@",?+O( '..r1m^TT!WWZ? 	 ) ""2q!Q>CCAFNNz[\^_ano 	 483G3G&"7#9/	 4H 4
0 0 %*II/IKa.bhj$k!"nn-FG03FF-FH[,\)!-55j-Q_`bc./ &&z=.QST 0	0  +.@@DTCVVN-~==r-   )FF)r#   r$   r%   r   r   r6   r'   rN   r   rO   r+   rL   rP   rQ   s   @r.   r   r     s    . .3 . 05,1H>\\H> <<H> 	H>
 'tnH> $D>H> 
u||XeELL&9:HU5<<EX<YY	ZH>r-   r   
similaritymatchability0matchability1c                    | j                   \  }}}t        j                  j                  |      t        j                  j                  |      j	                  dd      z   }t        j                  j                  | d      }t        j                  j                  | j	                  dd      j                         d      j	                  dd      }| j                  ||dz   |dz   fd      }	||z   |z   |	ddd|d|f<   t        j                  j                  |j                  d             |	dddddf<   t        j                  j                  |j                  d             |	dddddf<   |	S )z;create the log assignment matrix from logits and similarityr   r   rA   rS   r   N)	rj   r   r   
logsigmoidr   log_softmaxr   new_fullsqueeze)
r   r   r   r   num_keypoints_0num_keypoints_1certaintiesscores0scores1scoress
             r.   sigmoid_log_double_softmaxr   [  sS    4>3C3C0J--**=9BMM<T<TUb<c<m<mnoqr<ssKmm''
A6Gmm''
(<(<R(D(O(O(QSTU__`bdfgG  *o.A?UVCV!WYZ[F4;g4E4SF1 0 00111=3H3H3L2LMF1crc2:11=3H3H3L2LMF1b#2#:Mr-   c                        e Zd Zdef fdZdej                  dej                  dej                  fdZdej                  dej                  fdZ xZ	S )LightGlueMatchAssignmentLayerr1   c                     t         |           |j                  | _        t        j                  | j                  | j                  d      | _        t        j                  | j                  dd      | _        y )NTr3   r   )r5   r6   r8   r   r7   final_projectionmatchabilityr;   s     r.   r6   z&LightGlueMatchAssignmentLayer.__init__k  sY    $33 "		$*=*=t?R?RY] ^IId&9&914Hr-   r   r    r?   c                    |j                   \  }}}| j                  |      }|t        j                  | j                  |j
                        dz  z  }|j                  |dz  d||      }|d d df   }|d d df   }||j                  dd      z  }	||j                  |dz  d|      }|d d df   j                  d      }
|d d df   j                  d      j                  dd      }|
|z  }|	j                  |dk(  t        j                  |	j                        j                        }	| j                  |      }|j                  |dz  d|d      }|d d df   }|d d df   }t        |	||      }|S )Ndeviceg      ?r   r   r   rA   rS   )rj   r   r'   tensorr8   r   rl   r   r_   masked_fillfinfor]   minr   r   )r<   r   r    r   r   r8   m_descriptorsm_descriptors0m_descriptors1r   mask0mask1r   matchability_0matchability_1r   s                   r.   rL   z%LightGlueMatchAssignmentLayer.forwardr  s   4?4E4E1
M>--k:%T5H5HQ^QeQe(fjn(nn%--jAoq-Q_`&q!t,&q!t,#n&>&>r2&FF
<<
aMBDAJ((,EAJ((,66r2>E5=D#//	5;;zGWGW;X;\;\]J ((5#++J!OQqQ%ad+%ad+ ,JWr-   c                     | j                  |      }t        j                  j                  |      j	                  d      }|S )z0Get matchability of descriptors as a probabilityrA   )r   r   r   sigmoidr   )r<   r   r   s      r.   get_matchabilityz.LightGlueMatchAssignmentLayer.get_matchability  s7    ((5}},,\:BB2Fr-   )
r#   r$   r%   r   r6   r'   rN   rL   r   rP   rQ   s   @r.   r   r   j  sR    I I5<< u||  4ELL U\\ r-   r   c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )LightGlueTokenConfidenceLayerr1   c                 l    t         |           t        j                  |j                  d      | _        y Nr   )r5   r6   r   r7   r8   tokenr;   s     r.   r6   z&LightGlueTokenConfidenceLayer.__init__  s&    YYv44a8
r-   r   r?   c                     | j                  |j                               }t        j                  j	                  |      j                  d      }|S )NrA   )r   detachr   r   r   r   )r<   r   r   s      r.   rL   z%LightGlueTokenConfidenceLayer.forward  s=    

;--/0%%e,44R8r-   r   rQ   s   @r.   r   r     s*    9 9
5<< ELL r-   r   c                   0    e Zd ZU dZeed<   dZdZdZdZ	dZ
y)LightGluePreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    r1   	lightgluepixel_valuesFTN)r#   r$   r%   r&   r   r)   base_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_supports_sdpar,   r-   r.   r   r     s+    
 #$O&+#Nr-   r   r   	thresholdc                 6   | j                   \  }}}| ddddddf   j                  d      }| ddddddf   j                  d      }|j                  }|j                  }t        j                  |j                   d   |j
                        d   }t        j                  |j                   d   |j
                        d   }	||j                  d|      k(  }
|	|j                  d|      k(  }|j                  j                         }|j                  d      }t        j                  |
||      }t        j                  ||j                  d|      |      }|
||kD  z  }||j                  d|      z  }t        j                  ||d      }t        j                  ||d      }t        j                  ||g      j                  dd      j                  |dz  d      }t        j                  ||g      j                  dd      j                  |dz  d      }||fS )z1obtain matches from a score matrix [Bx M+1 x N+1]NrA   r   r   r   r   )rj   maxindicesr'   aranger   gathervaluesexp
new_tensorwhererT   r   rl   )r   r  r   _max0max1matches0matches1indices0indices1mutual0mutual1zeromatching_scores0matching_scores1valid0valid1r   r   s                      r.   get_matches_from_scoresr    s   ||J1!SbS#2#+""1%D!SbS#2#+""1%D||H||H ||HNN1-hooFtLH||HNN1-hooFtLH(//!X66G(//!X66G ;;??D??1D{{7D$7{{7,<,C,CAx,PRVW(945Fv}}Q11F {{68R0H{{68R0Hkk8X./99!Q?GG
UVXZ[Gkk#35E"FGQQRSUVW__`jmn`nprsOO##r-   r   heightwidthc                     t        j                  ||g| j                  | j                        d   }|dz  }|j	                  d      j
                  dz  }| |ddddf   z
  |d   z  } | S )a  
    Normalize keypoints locations based on image image_shape

    Args:
        keypoints (`torch.Tensor` of shape `(batch_size, num_keypoints, 2)`):
            Keypoints locations in (x, y) format.
        height (`int`):
            Image height.
        width (`int`):
            Image width.

    Returns:
        Normalized keypoints locations of shape (`torch.Tensor` of shape `(batch_size, num_keypoints, 2)`).
    r   r]   Nr   rA   .).NN)r'   r   r   r]   r  r  )r   r  r   sizeshiftscales         r.   normalize_keypointsr&    sp     <<	0@0@	XY]^D1HEHHRL!#EU3a<00E/4JJIr-   zV
    LightGlue model taking images as inputs and outputting the matching of them.
    c                       e Zd ZdZdef fdZdedefdZ	 d"de	j                  de	j                  d	ee   dee	j                  ee	j                  e	j                  f   f   fd
Zde	j                  dede	j                  de	j                  de	j                  f
dZd#dZde	j                  de	j                  dede	j                  fdZde	j                  de	j                  de	j                  de	j                  de	j                  de	j                  defdZd Zde	j                  de	j                  de	j                  de	j                  dee	j                  e	j                  f   f
dZ	 	 	 d$de	j                  de	j                  dedede	j                  dee   d	ee   dee	j                  e	j                  e	j                  eef   fdZee	 	 	 d$de	j0                  d ee	j2                     dee   d	ee   deeef   f
d!              Z xZS )%LightGlueForKeypointMatchingan  
    LightGlue is a model matching keypoints in images by leveraging detections from a keypoint detector such as
    SuperPoint. It is based on the SuperGlue architecture and is designed to be lightweight and efficient.
    It consists of :
        1. Keypoint Encoder
        2. A Graph Neural Network with self and cross attention layers
        3. Matching Assignment layers

    The correspondence ids use -1 to indicate non-matching points.

    Philipp Lindenberger, Paul-Edouard Sarlin and Marc Pollefeys. LightGlue: Local Feature Matching at Light Speed.
    In ICCV 2023. https://arxiv.org/pdf/2306.13643.pdf
    r1   c           	      ,   t         |   |       t        j                  |j                  |j
                        | _        |j                  j                  | _        |j                  | _	        |j                  | _        |j                  | _        |j                  | _        |j                  | _        | j                  | j                  k7  r2t        j                   | j                  | j                  d      | _        nt        j$                         | _        t'        |      | _        t        j*                  t-        |j                        D cg c]  }t/        ||       c}      | _        t        j*                  t-        |j                        D cg c]  }t3        |       c}      | _        t        j*                  t-        |j                  dz
        D cg c]  }t7        |       c}      | _        | j;                          y c c}w c c}w c c}w )N)trust_remote_codeTr3   )r   r   )r5   r6   r   from_configkeypoint_detector_configr*  keypoint_detectordescriptor_decoder_dim keypoint_detector_descriptor_dimr8   num_hidden_layers
num_layersfilter_thresholddepth_confidencewidth_confidencer   r7   input_projectionIdentityr0   positional_encoder
ModuleListranger   transformer_layersr   match_assignment_layersr   token_confidence	post_init)r<   r1   ir  r=   s       r.   r6   z%LightGlueForKeypointMatching.__init__  s    !>!J!J++v?W?W"
 170O0O0f0f-$33 22 & 7 7 & 7 7 & 7 7$"G"GG$&IId.S.SUYUhUhos$tD!$&KKMD!"<V"D"$--EJ6KcKcEde&v;e#
 (*}}<A&BZBZ<[\q*62\(
$ !#<A&BZBZ]^B^<_`q*62`!
 	 f ] as   HHHlayer_indexr?   c                     ddt        j                  d|z  | j                  z        z  z   }t        j                  |dd      S )z-scaled confidence threshold for a given layerg?g?g      r   r   )npr  r1  clip)r<   r?  r  s      r.   _get_confidence_thresholdz6LightGlueForKeypointMatching._get_confidence_threshold  s;    #tk'9DOO'K LLL	wwy!Q''r-   r   r   r>   c                     |j                         j                         }| j                  |      }| j                  ||      }||fS )Nr>   )r   r   r5  r7  )r<   r   r   r>   projected_descriptorskeypoint_encoding_outputs         r.   _keypoint_processingz1LightGlueForKeypointMatching._keypoint_processing  sO     "((*557 $ 5 5k B#'#:#:9[o#:#p $&>>>r-   keypoint_confidencesr    
num_pointsc                 |   |j                   \  }}|| j                  dz
  k  ru|j                  |dk(  d      }|j                  |dz  d      }| j	                  |      }d||k  j                         j                  d      |z  z
  }|| j                  kD  }	|	S t        j                  |t        j                        }	|	S )zRevaluate whether we should stop inference based on the confidence of the keypointsr   r   r   rA   g      ?rB   r\   )rj   r1  r   rl   rC  r^   sumr3  r'   onesrO   )
r<   rI  r?  r    rJ  r   r  r  ratio_confidentearly_stopped_pairss
             r.   _get_early_stopped_image_pairsz;LightGlueForKeypointMatching._get_early_stopped_image_pairs%  s     


A1,, $8#C#CDAIq#Q #7#?#?
aQS#T 66{CI!%9I%E$L$L$N$R$RWX$R$Y\f$ffO"1D4I4I"I
 #" #(**Zuzz"J""r-   c                     |
||   }||   } | j                   |   ||      }t        || j                        \  }}||fS r   )r;  r  r2  )r<   r   r    r?  early_stopsr   r   r   s           r.   _get_keypoint_matchingz3LightGlueForKeypointMatching._get_keypoint_matching8  sW    "%k2K$D:--k:;M#:64CXCX#Y ''r-   confidencesr   c                 \    |d| j                   z
  kD  }|||| j                  |      k  z  }|S )z#mask points which should be removedr   )r4  rC  )r<   rT  r   r?  keeps        r.   _get_pruning_maskz.LightGlueForKeypointMatching._get_pruning_mask@  s<    T2223"K4#A#A+#NNNDr-   r	  prune_outputc                    |j                   \  }}	}	| j                  |   j                  |      }
| j                  ||
|      j	                  |dk(  t        j                  d            fd||d   |d   |fD        \  }}}}}t        |      D ]  }||||   fxx   dz  cc<    d ||||fD        \  }}}}||f}t        |dd      }|||||fS )	z
        For a given layer, prune keypoints based on the confidence of the keypoints and the matchability of the
        descriptors.
        r   Fc              3   n   K   | ]&  }t        |      D cg c]
  \  }}||    c}} ( y c c}}w wr   )zip).0r   tr    pruned_keypoints_masks       r.   	<genexpr>zJLightGlueForKeypointMatching._do_layer_keypoint_pruning.<locals>.<genexpr>[  s9      c
 %(0E$FGDQtWGc
Gs   5/5r   c              3   6   K   | ]  }t        |d         yw)T)batch_firstNr   )r\  pruned_tensors     r.   r_  zJLightGlueForKeypointMatching._do_layer_keypoint_pruning.<locals>.<genexpr>c  s$      S
 D99S
s   TrA   ra  padding_value)	rj   r;  r   rW  r   r'   r   r9  r	   )r<   r   r   r    r	  rX  rI  r?  r   r  descriptors_matchabilitypruned_descriptorspruned_keypoints_0pruned_keypoints_1pruned_maskpruned_indicesr>  pruned_keypointsr^  s                     @r.   _do_layer_keypoint_pruningz7LightGlueForKeypointMatching._do_layer_keypoint_pruningG  s+    ',,
Aq#'#?#?#L#]#]^i#j  $ 6 67KMegr s 5 A A$!)U\\Z_M` ac
&	!ilDY[bcc
_.0BKQ_ z" 	4AN1--.!3.	4S
"46HJ\^i!jS
O.0BK /0BC%n$VXY!#3^[R^^^r-   c                     t        j                        d ||fD        \  }}d ||fD        \  }}fd||||fD        \  }}}}||||fS )Nc              3   8   K   | ]  }t        |d d        yw)TrA   rc  Nr   r\  r   s     r.   r_  zMLightGlueForKeypointMatching._concat_early_stopped_outputs.<locals>.<genexpr>u  s$      3
 TDD3
   c              3   8   K   | ]  }t        |d d        yw)Tr   rc  Nr   ro  s     r.   r_  zMLightGlueForKeypointMatching._concat_early_stopped_outputs.<locals>.<genexpr>y  s$      >
 TCC>
rp  c              3   (   K   | ]	  }|     y wr   r,   )r\  r   early_stops_indicess     r.   r_  zMLightGlueForKeypointMatching._concat_early_stopped_outputs.<locals>.<genexpr>}  s!      g
 &'g
s   )r'   rT   )r<   rs  final_pruned_keypoints_indices!final_pruned_keypoints_iterationsr   r   s    `    r.   _concat_early_stopped_outputsz:LightGlueForKeypointMatching._concat_early_stopped_outputsl  s     $kk*=>3
"$BC3
//>
*,MN>
::g
 .1	g
c"@Bc ./PRY[jjjr-   r   r   r   c                    |j                   \  }fd|||fD        \  }}}|d d df   }|d d df   }|d d df   }|d d df   }	|d d df   }
|d d df   }t        j                  dz  d|fd|j                  |j                        }t        j
                  dz  d|f|j                  |j                        }t        dz        D ]  }t        j                  ||   dk(  d||   j                  d||   j                  d                  ||d||   f<   t        j                  |	|   dk(  d||   j                  d|	|   j                  d                  ||d||   f<   |
|   ||d||   f<   ||   ||d||   f<    ||fS )Nc              3   J   K   | ]  }|j                  d z  d d        yw)r   rA   N)rl   )r\  r   r   s     r.   r_  zJLightGlueForKeypointMatching._do_final_keypoint_pruning.<locals>.<genexpr>  s'      -
7=FNN:?Ar2-
s    #r   r   r   rA   r"  )r   )
rj   r'   fullr   r]   zerosr9  r  r  clamp)r<   r	  r   r   r   r  r  r  r  r  r  r  _matches_matching_scoresr>  r   s                  @r.   _do_final_keypoint_pruningz7LightGlueForKeypointMatching._do_final_keypoint_pruning  s     
A-
BI7TcAd-
)/ 1a4=1a4=1a4=1a4=*1a40*1a40 ::zQ=A2gnndkdqdqr ;;1_a/oNcNc
 zQ' 	FA*/++r!2x{'9'9!Xa[=N=NST=N=U'V+HQ8A;&' +0++r!2x{'9'9!Xa[=N=NST=N=U'V+HQ8A;&' 3C12EQ8A;./2B12EQ8A;./	F )))r-   r  r   r   c           
      &	  ( |rdnd }|rdnd }	|j                   d   dk(  rT|j                   d d }
|j                  |
dt        j                        |j	                  |
      |j	                  |
      ||	fS |j
                  }|j                   \  }}}}t        j                  |j                  |d      d      }|j                  |dz  |d      }||j                  |dz  |      nd }|j                  |dz  || j                        }t        j                  |dz  |      }t        |||      }| j                  |||	      \  }}|d   }| j                  dkD  }| j                  dkD  }g }g }g }g }g }t        j                  d||      j                  |dz  d      }t        j                  |      }t!        | j"                        D ]3  }|j%                         }|| j'                  ||      }n&t        j(                  ||d
   f|j
                        } | j*                  |   |||||      }|\  }}} |r||z   }|r|	| z   }	|r|| j"                  dz
  k  r+ | j,                  |   |      }!| j/                  |!|||      }"n%t        j(                  |t        j0                        }"t        j2                  |"      r|"j5                  d      (|(   }#| j7                  |||(      \  }$}%|j9                  t;        |#             |j9                  t;        |$             |j9                  t;        |%             |r:|j9                  t;        |(                |j9                  t;        |(                ||"    }t=        (fd||d   |d   ||fD              \  }}&}'}}|&|'f}|rt=        (fd||!fD              \  }}}!t        j>                  |"      r n$|s| jA                  |||||!|      \  }}}}}6 |r4|r2| jC                  |||||      \  }}}}| jE                  ||||      \  }}nE| j7                  ||| j"                  dz
        \  }}t        j                  |      | j"                  z  }|j                  |d|      }|||||	fS )Nr,   r   r   rA   r\   r   rB   r   rE  rS   )rv   r>   r   )rJ  )rR  c              3   *   K   | ]
  }|      y wr   r,   r\  r   rR  s     r.   r_  zALightGlueForKeypointMatching._match_image_pair.<locals>.<genexpr>  s"      V" |,V   c              3   *   K   | ]
  }|      y wr   r,   r  s     r.   r_  zALightGlueForKeypointMatching._match_image_pair.<locals>.<genexpr>  s"      l & #K<0lr  )#rj   r   r'   r   	new_zerosr   rL  rl   r/  r
  r&  rH  r3  r4  rk   	ones_liker9  r1  r#  get_extended_attention_maskrM  r:  r<  rP  rO   anyrD   rS  extendlistr+   allrl  rv  r~  ))r<   r   r   r  r   r    r   r>   r   r   rj   r   r   r  initial_num_keypointsnum_points_per_pairimage_indicesrG  do_early_stopdo_keypoint_pruningrs  r   r   rt  ru  pruned_keypoints_indicespruned_keypoints_iterationsr?  r   extended_attention_masklayer_outputr!   	attentionrI  rO  early_stopped_image_indicesearly_stopped_matchesearly_stopped_matching_scoreskeypoints_0
keypoint_1rR  s)                                           @r.   _match_image_pairz.LightGlueForKeypointMatching._match_image_pair  sg    #7BD0d??1"OOCR(E""5"EII">##E*##E*!  !!2;///
A,a#iiZ(D!L%%j1n6KQO	FJFVt||JN,AB\`!))*q.:OQUQvQvwZ!^FC'	65A	040I0I9M 1J 1
-- -Q/	 --1 #33a7 )+&,.)#(<<3HQW#X#_#_`jmn`npr#s &+oo6N&O# 1 R	K%**,K*.*J*J4Q\*]'*/**j+b/5R[d[k[k*l'?422;?6%9"3L 5A1K	#$5$E! !/)!;1!44+M4+@+@+Mk+Z( +/*M*M,k4L_ +N +'
 +0**Zuzz*R'9901 #6"G"G"JK2?2L/KOKfKf#T;K Lg LH)+H (..t4O/PQNN4(=#>?#**40M+NO*6==dC[\gCh>ij9@@FabmFnAop +>?R>R*S'PU V'2IaL)A,PTVc&dV QMKj$ "-j 9I*fk l !9 ; 4+l gc02MOc 9901" 33#!03,# dY(@$HcQR	h 0 22'25# h*,MwXg (,'F'F.%	($G_ (,'B'B;PTVZVeVehiVi'j$G_050PSWSbSb0b-,M,U,U0-
)
 -
 	
r-   r   labelsc           
      0   d }|t        d      ||n| j                  j                  }||n| j                  j                  }|j                  dk7  s|j                  d      dk7  rt        d      |j                  \  }}}}	}
|j                  |dz  ||	|
      }| j                  |      }|d d \  }}}}|j                  |ddd      j                  |      }|j                  |dd| j                        j                  |      }|j                  |dd      }|j                         }|d d d d d d df   |
z  |d d d d d d df<   |d d d d d d df   |	z  |d d d d d d df<   | j                  |||	|
|||	      \  }}}}}t        ||||||||
      S )Nz9LightGlue is not trainable, no labels should be provided.   r   r   zOInput must be a 5D tensor of shape (batch_size, 2, num_channels, height, width)   rA   r   )r    r   r>   )r   r   r   r   r   r    r!   r"   )
ValueErrorr1   r   r>   ndimr#  rj   rl   r-  r`   r/  cloner  r   )r<   r   r  r   r>   r   r   r  channelsr  r   keypoint_detectionsr   r   r    absolute_keypointsr   r   r   r!   r"   s                        r.   rL   z$LightGlueForKeypointMatching.forwardZ  s    XYY1B1N-TXT_T_TqTq$8$D $++JjJj 	 !\%6%6q%9Q%>noo1=1C1C.
Ax#++JNHfeT"44\B*=bq*A'	1k4%%j!R;>>|L	!))*aT=b=bcffgst||J2.&__.);Aq!QJ)G%)O1aA:&);Aq!QJ)G&)P1aA:&EIE[E[/!5 F\ F
B%
 /+'!	
 		
r-   rM   r   )NNN)r#   r$   r%   r&   r   r6   r   r^   rC  r'   rN   r   rO   r+   rH  rP  rS  rW  rl  rv  r~  r  r   r   r(   
LongTensorr   r   rL   rP   rQ   s   @r.   r(  r(    s    @(S (U ( jo? <<?49LL?X`aeXf?	u||U5<<#=>>	??#$)LL#?B#JO,,#didpdp#	#&(U\\ 5<< ^a fkfrfr #_\\#_ <<#_ ll	#_
 #_ ll#_ $ll#_ #_Jk8#*#* #* 	#*
 ||#* 
u||U\\)	*#*V ",0/3k
<<k
 \\k
 	k

 k
 llk
 $D>k
 'tnk
 
u||U\\5<<E	Fk
Z  .2,0/33
''3
 ))*3
 $D>	3

 'tn3
 
u55	63
  3
r-   r(  r   )r   )5dataclassesr   typingr   r   r   numpyrA  r'   r   torch.nn.utils.rnnr	   activationsr   modeling_flash_attention_utilsr   modeling_utilsr   r   processing_utilsr   utilsr   r   r   utils.genericr   auto.modeling_autor   configuration_lightgluer   r   Moduler0   rZ   rg   rN   r   rq   r^   r   r   r   r   r   r   r   r   r+   r  r&  r(  __all__r,   r-   r.   <module>r     sU  ( " , ,    + ! B F & D D - > 4  :k  :  :F "<<	UU\\ 	U# 	U%,, 	U& %II%<<% 
% <<	%
 U\\*% % % '(%4D) D)N299 "P>		 P>f-2\\JO,,
\\&BII &R	BII 	   $ELL $U $uU\\[`[g[gMgGh $@5<<  S U\\ , 
f
#; f

f
R &'E
Fr-   