
    rh8                        d dl Z d dlmZ d dlmZmZmZ d dlZd dl	Z	d dl	m
Z
 d dlmZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZmZmZmZmZ ddl m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2  e       rd dl3m4Z4m5Z5  ejl                  e7      Z8 G d de      Z9e ed       G d de                    Z: G d de/      Z; G d d e
jx                        Z= G d! d"e,      Z> G d# d$e(      Z? G d% d&e
jx                        Z@d'e	j                  d(e	j                  d)e	j                  d*e	j                  fd+ZB G d, d-e
jx                        ZC G d. d/e
jx                        ZDe G d0 d1e             ZEd2e	j                  d3eFd*eGe	j                  e	j                  f   fd4ZHd5e	j                  d6eId7eId*e	j                  fd8ZJ ed9       G d: d;eE             ZKg d<ZLy)=    N)	dataclass)CallableOptionalUnion)nnpad_sequence   )PretrainedConfig)
ImageInputis_vision_availableto_numpy_array)FlashAttentionKwargs)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)ModelOutput
TensorTypeauto_docstringis_matplotlib_availablelogging)can_return_tuple   )CONFIG_MAPPING
AutoConfig)AutoModelForKeypointDetection)CLIPMLP)apply_rotary_pos_emb)LlamaAttentioneager_attention_forward)SuperGlueImageProcessorvalidate_and_format_image_pairs)SuperPointConfig)Image	ImageDrawc                   t     e Zd ZdZdZdeiZ	 	 	 	 	 	 	 	 	 	 	 	 	 ddedededede	de	d	e	d
e	de
def fdZ xZS )LightGlueConfiga  
    This is the configuration class to store the configuration of a [`LightGlueForKeypointMatching`]. It is used to
    instantiate a LightGlue model according to the specified arguments, defining the model architecture. Instantiating a
    configuration with the defaults will yield a similar configuration to that of the LightGlue
    [ETH-CVG/lightglue_superpoint](https://huggingface.co/ETH-CVG/lightglue_superpoint) architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        keypoint_detector_config (`Union[AutoConfig, dict]`,  *optional*, defaults to `SuperPointConfig`):
            The config object or dictionary of the keypoint detector.
        descriptor_dim (`int`, *optional*, defaults to 256):
            The dimension of the descriptors.
        num_hidden_layers (`int`, *optional*, defaults to 9):
            The number of self and cross attention layers.
        num_attention_heads (`int`, *optional*, defaults to 4):
            The number of heads in the multi-head attention.
        num_key_value_heads (`int`, *optional*):
            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
            `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
            by meanpooling all the original heads within that group. For more details checkout [this
            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
            `num_attention_heads`.
        depth_confidence (`float`, *optional*, defaults to 0.95):
            The confidence threshold used to perform early stopping
        width_confidence (`float`, *optional*, defaults to 0.99):
            The confidence threshold used to prune points
        filter_threshold (`float`, *optional*, defaults to 0.1):
            The confidence threshold used to filter matches
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        hidden_act (`str`, *optional*, defaults to `"gelu"`):
            The activation function to be used in the hidden layers.
        attention_dropout (`float`, *optional*, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        attention_bias (`bool`, *optional*, defaults to `True`):
            Whether to use a bias in the query, key, value and output projection layers during self-attention.
        trust_remote_code (`bool`, *optional*, defaults to `False`):
            Whether to trust remote code when using other models than SuperPoint as keypoint detector.

    Examples:
        ```python
        >>> from transformers import LightGlueConfig, LightGlueForKeypointMatching

        >>> # Initializing a LightGlue style configuration
        >>> configuration = LightGlueConfig()

        >>> # Initializing a model from the LightGlue style configuration
        >>> model = LightGlueForKeypointMatching(configuration)

        >>> # Accessing the model configuration
        >>> configuration = model.config
        ```
    	lightgluekeypoint_detector_configdescriptor_dimnum_hidden_layersnum_attention_headsdepth_confidencewidth_confidencefilter_thresholdinitializer_range
hidden_acttrust_remote_codec                     || _         ||z  dk7  rt        d      || _        || _        || _        ||}|| _        || _        || _        || _        |	| _	        t        |t              rZ|j                  dd      |d<   |d   t        vr%t        j                  |d   | j                         }nt        |d      di |ddi}|t        d   d	      }|| _        || _        |d
z  | _        |
| _        || _        || _        t-        | \  di | y )Nr   z1descriptor_dim % num_heads is different from zero
model_type
superpoint_name_or_pathr2   attn_implementationeager)r8   r    )r2   
ValueErrorr*   r+   r,   num_key_value_headsr-   r.   r/   r0   
isinstancedictgetr   r   from_pretrainedr)   hidden_sizeintermediate_sizer1   attention_dropoutattention_biassuper__init__)selfr)   r*   r+   r,   r<   r-   r.   r/   r0   r1   rC   rD   r2   kwargs	__class__s                  /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/lightglue/modular_lightglue.pyrF   zLightGlueConfig.__init__l   sQ   ( "3//14PQQ,!2#6  &"5#6  0 0 0!2 .55M5Q5QR^`l5m$\2'5^K+5+E+E,_=QUQgQg,( ,::RS_:`+a ,.,DK,( $+'5l'CX_'`$(@%)!/!!3$!2,"6"    )N   	      Ngffffff?gGz?皙?g{Gz?gelu        TF)__name__
__module____qualname____doc__r4   r   sub_configsr#   intfloatstrboolrF   __classcell__rI   s   @rJ   r'   r'   .   s    8t J-z:K 6:!!"#$ "&"&"%#' "'?#"2?# ?# 	?#
 !?#  ?#  ?#  ?# !?# ?#  ?# ?#rK   r'   a  
    Base class for outputs of LightGlue keypoint matching models. Due to the nature of keypoint detection and matching,
    the number of keypoints is not fixed and can vary from image to image, which makes batching non-trivial. In the
    batch of images, the maximum number of matches is set as the dimension of the matches and matching scores. The mask
    tensor is used to indicate which values in the keypoints, matches, matching_scores and prune tensors are keypoint
    matching information.
    )custom_introc                   ^   e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeej                     ed<   dZeej                     ed<   dZeej                     ed<   dZeeej                        ed	<   dZeeej                        ed
<   y)LightGlueKeypointMatchingOutputa  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*):
        Loss computed during training.
    matches (`torch.FloatTensor` of shape `(batch_size, 2, num_matches)`):
        Index of keypoint matched in the other image.
    matching_scores (`torch.FloatTensor` of shape `(batch_size, 2, num_matches)`):
        Scores of predicted matches.
    keypoints (`torch.FloatTensor` of shape `(batch_size, num_keypoints, 2)`):
        Absolute (x, y) coordinates of predicted keypoints in a given image.
    prune (`torch.IntTensor` of shape `(batch_size, num_keypoints)`):
        Pruning mask indicating which keypoints are removed and at which layer.
    mask (`torch.BoolTensor` of shape `(batch_size, num_keypoints)`):
        Mask indicating which values in matches, matching_scores, keypoints and prune are keypoint matching
        information.
    hidden_states (`Tuple[torch.FloatTensor, ...]`, *optional*):
        Tuple of `torch.FloatTensor` (one for the output of each stage) of shape `(batch_size, 2, num_channels,
        num_keypoints)` returned when `output_hidden_states=True` is passed or when
        `config.output_hidden_states=True`
    attentions (`Tuple[torch.FloatTensor, ...]`, *optional*):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, 2, num_heads, num_keypoints,
        num_keypoints)` returned when `output_attentions=True` is passed or when
        `config.output_attentions=True`
    Nlossmatchesmatching_scores	keypointsprunemaskhidden_states
attentions)rR   rS   rT   rU   r`   r   torchFloatTensor__annotations__ra   rb   rc   rd   	IntTensorre   rf   tuplerg   r:   rK   rJ   r_   r_      s    0 )-D(5$$
%,+/GXe''(/37OXe//07-1Ix))*1'+E8EOO$+(,D(5$$
%,8<M8E%"3"345<59Ju00129rK   r_   c                        e Zd Z	 ddedeeee   f   dedee	e
ej                  f      f fdZdedee	e
ej                  f      ded   fd	Zd
 ZdedefdZ xZS )LightGlueImageProcessoroutputstarget_sizes	thresholdreturnc                 &    t         |   |||      S N)rE   post_process_keypoint_matching)rG   ro   rp   rq   rI   s       rJ   ru   z6LightGlueImageProcessor.post_process_keypoint_matching   s     w5g|YWWrK   imageskeypoint_matching_outputzImage.Imagec           	         t        |      }|D cg c]  }t        |       }}t        dt        |      d      D cg c]
  }|||dz     }}g }t	        ||      D ]  \  }}|d   j
                  dd \  }	}
|d   j
                  dd \  }}t        j                  t        |	|      |
|z   dft        j                        }|d   |d|	d|
f<   |d   |d||
df<   t        j                  |      }t        j                  |      }|d   j                  d      \  }}|d   j                  d      \  }}t	        |||||d	         D ]  \  }}}}}| j                  |      }|j!                  ||||
z   |f|d
       |j#                  |dz
  |dz
  |dz   |dz   fd       |j#                  ||
z   dz
  |dz
  ||
z   dz   |dz   fd        |j%                  |        |S c c}w c c}w )a  
        Plots the image pairs side by side with the detected keypoints as well as the matching between them.

        Args:
            images (`ImageInput`):
                Image pairs to plot. Same as `LightGlueImageProcessor.preprocess`. Expects either a list of 2
                images or a list of list of 2 images list with pixel values ranging from 0 to 255.
            keypoint_matching_output (List[Dict[str, torch.Tensor]]]):
                A post processed keypoint matching output

        Returns:
            `List[PIL.Image.Image]`: A list of PIL images, each containing the image pairs side by side with the detected
            keypoints as well as the matching between them.
        r   r   N   r
   dtype
keypoints0
keypoints1rb   )fillwidthblack)r~   )r"   r   rangelenzipshapenpzerosmaxuint8r$   	fromarrayr%   Drawunbind
_get_colorlineellipseappend)rG   rv   rw   imageiimage_pairsresults
image_pairpair_outputheight0width0height1width1
plot_imageplot_image_pildrawkeypoints0_xkeypoints0_ykeypoints1_xkeypoints1_ykeypoint0_xkeypoint0_ykeypoint1_xkeypoint1_ymatching_scorecolors                             rJ   visualize_keypoint_matchingz3LightGlueImageProcessor.visualize_keypoint_matching   sI   & 185;<E.'<<273v;2JKQva!a%(KK'*;8P'Q 	+#J(m11"15OGV(m11"15OGV3w#8&6/1"MUWU]U]^J,6qMJxx&(),6qMJxx()"__Z8N>>.1D)4\)B)I)I!)L&L,)4\)B)I)I!)L&L,VYlL,TeHfW R[+{N 7		 +{V/C[Q  
 kAo{QaQ\_`Q`ahop 6)A-{Qf@TWX@XZehiZij    NN>*7	+8 A =Ks
   G G%c                 N    t        dd|z
  z        }t        d|z        }d}|||fS )zMaps a score to a color.   ry   r   )rW   )rG   scorergbs        rJ   r   z"LightGlueImageProcessor._get_color  s4    q5y!"e1ayrK   c           
         t        j                  dt               t               rddlm} nt        d      t        |      }|D cg c]  }t        |       }}t        dt        |      d      D cg c]
  }|||dz     }}t        ||      D ]a  \  }}|d   j                  dd \  }	}
|d   j                  dd \  }}t        j                  t        |	|      |
|z   df      }|d   dz  |d|	d|
f<   |d   dz  |d||
df<   |j!                  |       |j#                  d	       |d
   j%                  d      \  }}|d   j%                  d      \  }}t        |||||d         D ]u  \  }}}}}|j'                  |||
z   g||g |j)                  d      |j+                               dd       |j-                  ||dd       |j-                  ||
z   |dd       w |j/                          d yc c}w c c}w )a  
        Plots the image pairs side by side with the detected keypoints as well as the matching between them. Requires
        matplotlib to be installed.

        .. deprecated::
            `plot_keypoint_matching` is deprecated and will be removed in a future version. Use `visualize_keypoint_matching` instead.

        Args:
            images (`ImageInput`):
                Image pairs to plot. Same as `LightGlueImageProcessor.preprocess`. Expects either a list of 2 images or
                a list of list of 2 images list with pixel values ranging from 0 to 255.
            keypoint_matching_output ([`LightGlueKeypointMatchingOutput`]):
                Raw outputs of the model.
        zx`plot_keypoint_matching` is deprecated and will be removed in transformers v. Use `visualize_keypoint_matching` instead.r   Nz@Please install matplotlib to use `plot_keypoint_matching` methodr   ry   r
   g     o@offr|   r}   rb   RdYlGng?g      ?)r   alpha	linewidthr   )cs)warningswarnFutureWarningr   matplotlib.pyplotpyplotImportErrorr"   r   r   r   r   r   r   r   r   imshowaxisr   plotget_cmapitemscattershow)rG   rv   rw   pltr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s                          rJ   plot_keypoint_matchingz.LightGlueImageProcessor.plot_keypoint_matching#  s4    	9	
 #$+`aa085;<E.'<<273v;2JKQva!a%(KK'*;8P'Q 	#J(m11"15OGV(m11"15OGV3w#8&6/1"MNJ,6qME,AJxx&(),6qME,AJxx()JJz"HHUO)4\)B)I)I!)L&L,)4\)B)I)I!)L&L,VYlL,TeHfW OR[+{N  +"67 +.0#,,x01D1D1FG!   K1EK&0+ANO HHJ/	 =Ks   G52G:)rQ   )rR   rS   rT   r_   r   r   listrl   rX   r>   rY   rh   Tensorru   r   r   r   r   r[   r\   s   @rJ   rn   rn      s    
 	X0X JU34X 	X
 
d3$%	&X44 #'tC,='>"?4 
m		4n5Z 5Sr 5rK   rn   c                        e Zd Zdef fdZ	 ddej                  dee   de	e
ej                     e
ej                  ej                  f   f   fdZ xZS )LightGluePositionalEncoderconfigc                     t         |           t        j                  d|j                  |j
                  z  dz  d      | _        y )Nr   Fbias)rE   rF   r   Linearr*   r,   	projectorrG   r   rI   s     rJ   rF   z#LightGluePositionalEncoder.__init__\  s:    1f&;&;v?Y?Y&Y]^&^ejkrK   rc   output_hidden_statesrr   c                     | j                  |      }|j                  dd      }t        j                  |      }t        j                  |      }||f}|r||f}|S |f}|S )Nr   dim)r   repeat_interleaverh   cossin)rG   rc   r   projected_keypoints
embeddingscosinessinesoutputs           rJ   forwardz"LightGluePositionalEncoder.forward`  sq     #nnY7(::1":E
))J'		*%u%
6J*12 R\P]rK   F)rR   rS   rT   r'   rF   rh   r   r   rZ   r   rl   r   r[   r\   s   @rJ   r   r   [  sb    l l
 OT		=Ed^		uU\\"E%,,*D$EE	F	rK   r   c                   4   e Zd Z	 	 	 	 d
dej                  deeej                  ej                  f      deej                     deej                     deej                     dee   deej                  eej                     eeej                        f   fd	Z	y)LightGlueAttentionNrf   position_embeddingsattention_maskencoder_hidden_statesencoder_attention_maskrH   rr   c                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	|d u}
|
r|n|}|
r|n|}| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }||\  }}t        |	|||      \  }	}t        }| j                  j                  dk7  rt        | j                  j                     } || |	|||f| j                  sdn| j                  | j                  d|\  }} |j                  g |d j!                         }| j#                  |      }||fS )Nr   ry   r   r9   rQ   )dropoutscaling)r   head_dimq_projview	transposek_projv_projr   r    r   _attn_implementationr   trainingrC   r   reshape
contiguouso_proj)rG   rf   r   r   r   r   rH   input_shapehidden_shapequery_statesis_cross_attentioncurrent_statescurrent_attention_mask
key_statesvalue_statesr   r   attention_interfaceattn_outputattn_weightss                       rJ   r   zLightGlueAttention.forwardm  s    $))#2.88b8$--8{{=166|DNNqRST2$>2D.-;M!7Sa[[055lCMMaQRS
{{>277EOOPQSTU**HC';L*VY[^'_$L*(?;;++w6"9$++:Z:Z"[$7"	%
  $}}C$2H2HLL	%
 	%
!\ *k));;;;FFHkk+.L((rK   )NNNN)
rR   rS   rT   rh   r   r   rl   r   r   r   r:   rK   rJ   r   r   l  s     LP158<9=*)||*) &eELL%,,,F&GH*) !.	*)
  (5*) !) 6*) -.*) 
u||Xell3XeELL>Q5RR	S*)rK   r   c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )LightGlueMLPr   c                     t         |   |       t        j                  |j                  |j                        | _        t        j                  |j                  d      | _        y )NT)elementwise_affine)rE   rF   r   r   rB   fc1	LayerNorm
layer_normr   s     rJ   rF   zLightGlueMLP.__init__  sG     99V55v7O7OP,,v'?'?TXYrK   rf   rr   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }|S rt   )r  r  activation_fnfc2)rG   rf   s     rJ   r   zLightGlueMLP.forward  sB    /6**=9/rK   	rR   rS   rT   r'   rF   rh   r   r   r[   r\   s   @rJ   r   r     s,    Z Z
U\\ ell rK   r   c                        e Zd Zdedef fdZ	 	 ddej                  dej                  dej                  dee	   dee	   d	e
ej                  ee
ej                        ee
ej                        f   fd
Z xZS )LightGlueTransformerLayerr   	layer_idxc                     t         |           t        ||      | _        t	        |      | _        t        ||      | _        t	        |      | _        y rt   )rE   rF   r   self_attentionr   self_mlpcross_attention	cross_mlp)rG   r   r
  rI   s      rJ   rF   z"LightGlueTransformerLayer.__init__  sD    0C$V,1&)D%f-rK   descriptorsrc   r   r   output_attentionsrr   c                    |rdnd }|rdnd }|r||fz   }|j                   \  }}	}
| j                  ||||      \  }}t        j                  ||gd      }| j	                  |      }||z   }|r||f}|j                  dd|	|
      j                  d      j                  ||	|
      }|6|j                  dddd|	      j                  d      j                  |dd|	      nd }| j                  ||||      \  }}t        j                  ||gd      }| j                  |      }||z   }|r6||f}||j                  ||	|
      fz   z   |j                  ||	|
      fz   |z   }|r
||fz   |fz   }|||fS )Nr:   )r   r   r  r   r   r   ry   )r   r   r  )	r   r  rh   catr  r   flipr  r  )rG   r  rc   r   r   r  all_hidden_statesall_attentions
batch_sizenum_keypointsr*   attention_outputself_attentionsintermediate_statesoutput_statesself_attention_descriptorsself_attention_hidden_statesr   r   cross_attention_outputcross_attentionscross_intermediate_statescross_output_statescross_attention_hidden_statess                           rJ   r   z!LightGlueTransformerLayer.forward  s	    #7BD0d 1[N B4?4E4E1
M> -1,?,? ))/	 -@ -
)/ $ii6F(GRP&9:%0=%@",?+O( '..r1m^TT!WWZ? 	 ) ""2q!Q>CCAFNNz[\^_ano 	 483G3G&"7#9/	 4H 4
0 0 %*II/IKa.bhj$k!"nn-FG03FF-FH[,\)!-55j-Q_`bc./ &&z=.QST 0	0  +.@@DTCVVN-~==rK   )FF)rR   rS   rT   r'   rW   rF   rh   r   r   rZ   rl   r   r[   r\   s   @rJ   r	  r	    s    . .3 . 05,1H>\\H> <<H> 	H>
 'tnH> $D>H> 
u||XeELL&9:HU5<<EX<YY	ZH>rK   r	  
similaritymatchability0matchability1rr   c                    | j                   \  }}}t        j                  j                  |      t        j                  j                  |      j	                  dd      z   }t        j                  j                  | d      }t        j                  j                  | j	                  dd      j                         d      j	                  dd      }| j                  ||dz   |dz   fd      }	||z   |z   |	ddd|d|f<   t        j                  j                  |j                  d             |	dddddf<   t        j                  j                  |j                  d             |	dddddf<   |	S )z;create the log assignment matrix from logits and similarityry   r   r   r   N)	r   r   
functional
logsigmoidr   log_softmaxr   new_fullsqueeze)
r$  r%  r&  r  num_keypoints_0num_keypoints_1certaintiesscores0scores1scoress
             rJ   sigmoid_log_double_softmaxr4    sS    4>3C3C0J--**=9BMM<T<TUb<c<m<mnoqr<ssKmm''
A6Gmm''
(<(<R(D(O(O(QSTU__`bdfgG  *o.A?UVCV!WYZ[F4;g4E4SF1 0 00111=3H3H3L2LMF1crc2:11=3H3H3L2LMF1b#2#:MrK   c                        e Zd Zdef fdZdej                  dej                  dej                  fdZdej                  dej                  fdZ xZ	S )LightGlueMatchAssignmentLayerr   c                     t         |           |j                  | _        t        j                  | j                  | j                  d      | _        t        j                  | j                  dd      | _        y )NTr   ry   )rE   rF   r*   r   r   final_projectionmatchabilityr   s     rJ   rF   z&LightGlueMatchAssignmentLayer.__init__  sY    $33 "		$*=*=t?R?RY] ^IId&9&914HrK   r  re   rr   c                    |j                   \  }}}| j                  |      }|t        j                  | j                  |j
                        dz  z  }|j                  |dz  d||      }|d d df   }|d d df   }||j                  dd      z  }	||j                  |dz  d|      }|d d df   j                  d      }
|d d df   j                  d      j                  dd      }|
|z  }|	j                  |dk(  t        j                  |	j                        j                        }	| j                  |      }|j                  |dz  d|d      }|d d df   }|d d df   }t        |	||      }|S )Ndeviceg      ?r   r   ry   r   r(  )r   r8  rh   tensorr*   r<  r   r   	unsqueezemasked_fillfinfor{   minr9  r4  )rG   r  re   r  r  r*   m_descriptorsm_descriptors0m_descriptors1r$  mask0mask1r9  matchability_0matchability_1r3  s                   rJ   r   z%LightGlueMatchAssignmentLayer.forward  s   4?4E4E1
M>--k:%T5H5HQ^QeQe(fjn(nn%--jAoq-Q_`&q!t,&q!t,#n&>&>r2&FF
<<
aMBDAJ((,EAJ((,66r2>E5=D#//	5;;zGWGW;X;\;\]J ((5#++J!OQqQ%ad+%ad+ ,JWrK   c                     | j                  |      }t        j                  j                  |      j	                  d      }|S )z0Get matchability of descriptors as a probabilityr   )r9  r   r)  sigmoidr-  )rG   r  r9  s      rJ   get_matchabilityz.LightGlueMatchAssignmentLayer.get_matchability,  s7    ((5}},,\:BB2FrK   )
rR   rS   rT   r'   rF   rh   r   r   rK  r[   r\   s   @rJ   r6  r6  
  sR    I I5<< u||  4ELL U\\ rK   r6  c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )LightGlueTokenConfidenceLayerr   c                 l    t         |           t        j                  |j                  d      | _        y )Nry   )rE   rF   r   r   r*   tokenr   s     rJ   rF   z&LightGlueTokenConfidenceLayer.__init__4  s&    YYv44a8
rK   r  rr   c                     | j                  |j                               }t        j                  j	                  |      j                  d      }|S )Nr   )rO  detachr   r)  rJ  r-  )rG   r  rO  s      rJ   r   z%LightGlueTokenConfidenceLayer.forward9  s=    

;--/0%%e,44R8rK   r  r\   s   @rJ   rM  rM  3  s*    9 9
5<< ELL rK   rM  c                   0    e Zd ZU dZeed<   dZdZdZdZ	dZ
y)LightGluePreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    r   r(   pixel_valuesFTN)rR   rS   rT   rU   r'   rj   base_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_supports_sdpar:   rK   rJ   rS  rS  ?  s+    
 #$O&+#NrK   rS  r3  rq   c                 6   | j                   \  }}}| ddddddf   j                  d      }| ddddddf   j                  d      }|j                  }|j                  }t        j                  |j                   d   |j
                        d   }t        j                  |j                   d   |j
                        d   }	||j                  d|      k(  }
|	|j                  d|      k(  }|j                  j                         }|j                  d      }t        j                  |
||      }t        j                  ||j                  d|      |      }|
||kD  z  }||j                  d|      z  }t        j                  ||d      }t        j                  ||d      }t        j                  ||g      j                  dd      j                  |dz  d      }t        j                  ||g      j                  dd      j                  |dz  d      }||fS )z1obtain matches from a score matrix [Bx M+1 x N+1]Nr   r   ry   r;  r   )r   r   indicesrh   aranger<  gathervaluesexp
new_tensorwherestackr   r   )r3  rq   r  _max0max1matches0matches1indices0indices1mutual0mutual1zeromatching_scores0matching_scores1valid0valid1ra   rb   s                      rJ   get_matches_from_scoresrq  N  s   ||J1!SbS#2#+""1%D!SbS#2#+""1%D||H||H ||HNN1-hooFtLH||HNN1-hooFtLH(//!X66G(//!X66G ;;??D??1D{{7D$7{{7,<,C,CAx,PRVW(945Fv}}Q11F {{68R0H{{68R0Hkk8X./99!Q?GG
UVXZ[Gkk#35E"FGQQRSUVW__`jmn`nprsOO##rK   rc   heightr   c                     t        j                  ||g| j                  | j                        d   }|dz  }|j	                  d      j
                  dz  }| |ddddf   z
  |d   z  } | S )a  
    Normalize keypoints locations based on image image_shape

    Args:
        keypoints (`torch.Tensor` of shape `(batch_size, num_keypoints, 2)`):
            Keypoints locations in (x, y) format.
        height (`int`):
            Image height.
        width (`int`):
            Image width.

    Returns:
        Normalized keypoints locations of shape (`torch.Tensor` of shape `(batch_size, num_keypoints, 2)`).
    r<  r{   Nr   r   .).NN)rh   r=  r<  r{   r   r^  )rc   rr  r   sizeshiftscales         rJ   normalize_keypointsrx  n  sp     <<	0@0@	XY]^D1HEHHRL!#EU3a<00E/4JJIrK   zV
    LightGlue model taking images as inputs and outputting the matching of them.
    c                       e Zd ZdZdef fdZdedefdZ	 d"de	j                  de	j                  d	ee   dee	j                  ee	j                  e	j                  f   f   fd
Zde	j                  dede	j                  de	j                  de	j                  f
dZd#dZde	j                  de	j                  dede	j                  fdZde	j                  de	j                  de	j                  de	j                  de	j                  de	j                  defdZd Zde	j                  de	j                  de	j                  de	j                  dee	j                  e	j                  f   f
dZ	 	 	 d$de	j                  de	j                  dedede	j                  dee   d	ee   dee	j                  e	j                  e	j                  eef   fdZee	 	 	 d$de	j0                  d ee	j2                     dee   d	ee   deeef   f
d!              Z xZS )%LightGlueForKeypointMatchingan  
    LightGlue is a model matching keypoints in images by leveraging detections from a keypoint detector such as
    SuperPoint. It is based on the SuperGlue architecture and is designed to be lightweight and efficient.
    It consists of :
        1. Keypoint Encoder
        2. A Graph Neural Network with self and cross attention layers
        3. Matching Assignment layers

    The correspondence ids use -1 to indicate non-matching points.

    Philipp Lindenberger, Paul-Edouard Sarlin and Marc Pollefeys. LightGlue: Local Feature Matching at Light Speed.
    In ICCV 2023. https://arxiv.org/pdf/2306.13643.pdf
    r   c           	      ,   t         |   |       t        j                  |j                  |j
                        | _        |j                  j                  | _        |j                  | _	        |j                  | _        |j                  | _        |j                  | _        |j                  | _        | j                  | j                  k7  r2t        j                   | j                  | j                  d      | _        nt        j$                         | _        t'        |      | _        t        j*                  t-        |j                        D cg c]  }t/        ||       c}      | _        t        j*                  t-        |j                        D cg c]  }t3        |       c}      | _        t        j*                  t-        |j                  dz
        D cg c]  }t7        |       c}      | _        | j;                          y c c}w c c}w c c}w )Nr7   Tr   )r
  ry   )rE   rF   r   from_configr)   r2   keypoint_detectordescriptor_decoder_dim keypoint_detector_descriptor_dimr*   r+   
num_layersr/   r-   r.   r   r   input_projectionIdentityr   positional_encoder
ModuleListr   r	  transformer_layersr6  match_assignment_layersrM  token_confidence	post_init)rG   r   r   rc  rI   s       rJ   rF   z%LightGlueForKeypointMatching.__init__  s    !>!J!J++v?W?W"
 170O0O0f0f-$33 22 & 7 7 & 7 7 & 7 7$"G"GG$&IId.S.SUYUhUhos$tD!$&KKMD!"<V"D"$--EJ6KcKcEde&v;e#
 (*}}<A&BZBZ<[\q*62\(
$ !#<A&BZBZ]^B^<_`q*62`!
 	 f ] as   HHHlayer_indexrr   c                     ddt        j                  d|z  | j                  z        z  z   }t        j                  |dd      S )z-scaled confidence threshold for a given layerg?rO   g      r   ry   )r   r_  r  clip)rG   r  rq   s      rJ   _get_confidence_thresholdz6LightGlueForKeypointMatching._get_confidence_threshold  s;    #tk'9DOO'K LLL	wwy!Q''rK   r  rc   r   c                     |j                         j                         }| j                  |      }| j                  ||      }||fS )Nr   )rQ  r   r  r  )rG   r  rc   r   projected_descriptorskeypoint_encoding_outputs         rJ   _keypoint_processingz1LightGlueForKeypointMatching._keypoint_processing  sO     "((*557 $ 5 5k B#'#:#:9[o#:#p $&>>>rK   keypoint_confidencesre   
num_pointsc                 |   |j                   \  }}|| j                  dz
  k  ru|j                  |dk(  d      }|j                  |dz  d      }| j	                  |      }d||k  j                         j                  d      |z  z
  }|| j                  kD  }	|	S t        j                  |t        j                        }	|	S )zRevaluate whether we should stop inference based on the confidence of the keypointsry   r   r   r   g      ?r   rz   )r   r  r?  r   r  rX   sumr-   rh   onesrZ   )
rG   r  r  re   r  r  rc  rq   ratio_confidentearly_stopped_pairss
             rJ   _get_early_stopped_image_pairsz;LightGlueForKeypointMatching._get_early_stopped_image_pairs  s     


A1,, $8#C#CDAIq#Q #7#?#?
aQS#T 66{CI!%9I%E$L$L$N$R$RWX$R$Y\f$ffO"1D4I4I"I
 #" #(**Zuzz"J""rK   c                     |
||   }||   } | j                   |   ||      }t        || j                        \  }}||fS rt   )r  rq  r/   )rG   r  re   r  early_stopsr3  ra   rb   s           rJ   _get_keypoint_matchingz3LightGlueForKeypointMatching._get_keypoint_matching  sW    "%k2K$D:--k:;M#:64CXCX#Y ''rK   confidencesr3  c                 \    |d| j                   z
  kD  }|||| j                  |      k  z  }|S )z#mask points which should be removedry   )r.   r  )rG   r  r3  r  keeps        rJ   _get_pruning_maskz.LightGlueForKeypointMatching._get_pruning_mask  s<    T2223"K4#A#A+#NNNDrK   r[  prune_outputc                    |j                   \  }}	}	| j                  |   j                  |      }
| j                  ||
|      j	                  |dk(  t        j                  d            fd||d   |d   |fD        \  }}}}}t        |      D ]  }||||   fxx   dz  cc<    d ||||fD        \  }}}}||f}t        |dd      }|||||fS )	z
        For a given layer, prune keypoints based on the confidence of the keypoints and the matchability of the
        descriptors.
        r   Fc              3   n   K   | ]&  }t        |      D cg c]
  \  }}||    c}} ( y c c}}w wrt   )r   ).0r=  tre   pruned_keypoints_masks       rJ   	<genexpr>zJLightGlueForKeypointMatching._do_layer_keypoint_pruning.<locals>.<genexpr>  s9      c
 %(0E$FGDQtWGc
Gs   5/5ry   c              3   6   K   | ]  }t        |d         yw)T)batch_firstNr   )r  pruned_tensors     rJ   r  zJLightGlueForKeypointMatching._do_layer_keypoint_pruning.<locals>.<genexpr>  s$      S
 D99S
s   Tr   r  padding_value)	r   r  rK  r  r?  rh   r=  r   r	   )rG   r  rc   re   r[  r  r  r  r  rc  descriptors_matchabilitypruned_descriptorspruned_keypoints_0pruned_keypoints_1pruned_maskpruned_indicesr   pruned_keypointsr  s                     @rJ   _do_layer_keypoint_pruningz7LightGlueForKeypointMatching._do_layer_keypoint_pruning  s+    ',,
Aq#'#?#?#L#]#]^i#j  $ 6 67KMegr s 5 A A$!)U\\Z_M` ac
&	!ilDY[bcc
_.0BKQ_ z" 	4AN1--.!3.	4S
"46HJ\^i!jS
O.0BK /0BC%n$VXY!#3^[R^^^rK   c                     t        j                        d ||fD        \  }}d ||fD        \  }}fd||||fD        \  }}}}||||fS )Nc              3   8   K   | ]  }t        |d d        yw)Tr   r  Nr   r  r=  s     rJ   r  zMLightGlueForKeypointMatching._concat_early_stopped_outputs.<locals>.<genexpr>  s$      3
 TDD3
   c              3   8   K   | ]  }t        |d d        yw)Tr   r  Nr   r  s     rJ   r  zMLightGlueForKeypointMatching._concat_early_stopped_outputs.<locals>.<genexpr>  s$      >
 TCC>
r  c              3   (   K   | ]	  }|     y wrt   r:   )r  r=  early_stops_indicess     rJ   r  zMLightGlueForKeypointMatching._concat_early_stopped_outputs.<locals>.<genexpr>  s!      g
 &'g
s   )rh   rb  )rG   r  final_pruned_keypoints_indices!final_pruned_keypoints_iterationsra   rb   s    `    rJ   _concat_early_stopped_outputsz:LightGlueForKeypointMatching._concat_early_stopped_outputs  s     $kk*=>3
"$BC3
//>
*,MN>
::g
 .1	g
c"@Bc ./PRY[jjjrK   ra   rb   r  c                    |j                   \  }fd|||fD        \  }}}|d d df   }|d d df   }|d d df   }|d d df   }	|d d df   }
|d d df   }t        j                  dz  d|fd|j                  |j                        }t        j
                  dz  d|f|j                  |j                        }t        dz        D ]  }t        j                  ||   dk(  d||   j                  d||   j                  d                  ||d||   f<   t        j                  |	|   dk(  d||   j                  d|	|   j                  d                  ||d||   f<   |
|   ||d||   f<   ||   ||d||   f<    ||fS )Nc              3   J   K   | ]  }|j                  d z  d d        yw)r   r   N)r   )r  r=  r  s     rJ   r  zJLightGlueForKeypointMatching._do_final_keypoint_pruning.<locals>.<genexpr>2  s'      -
7=FNN:?Ar2-
s    #r   ry   r   r   rt  )rA  )
r   rh   fullr<  r{   r   r   ra  r]  clamp)rG   r[  ra   rb   r  rc  rh  ri  rf  rg  rm  rn  _matches_matching_scoresr   r  s                  @rJ   _do_final_keypoint_pruningz7LightGlueForKeypointMatching._do_final_keypoint_pruning(  s     
A-
BI7TcAd-
)/ 1a4=1a4=1a4=1a4=*1a40*1a40 ::zQ=A2gnndkdqdqr ;;1_a/oNcNc
 zQ' 	FA*/++r!2x{'9'9!Xa[=N=NST=N=U'V+HQ8A;&' +0++r!2x{'9'9!Xa[=N=NST=N=U'V+HQ8A;&' 3C12EQ8A;./2B12EQ8A;./	F )))rK   rr  r   r  c           
      &	  ( |rdnd }|rdnd }	|j                   d   dk(  rT|j                   d d }
|j                  |
dt        j                        |j	                  |
      |j	                  |
      ||	fS |j
                  }|j                   \  }}}}t        j                  |j                  |d      d      }|j                  |dz  |d      }||j                  |dz  |      nd }|j                  |dz  || j                        }t        j                  |dz  |      }t        |||      }| j                  |||	      \  }}|d   }| j                  dkD  }| j                  dkD  }g }g }g }g }g }t        j                  d||      j                  |dz  d      }t        j                  |      }t!        | j"                        D ]3  }|j%                         }|| j'                  ||      }n&t        j(                  ||d
   f|j
                        } | j*                  |   |||||      }|\  }}} |r||z   }|r|	| z   }	|r|| j"                  dz
  k  r+ | j,                  |   |      }!| j/                  |!|||      }"n%t        j(                  |t        j0                        }"t        j2                  |"      r|"j5                  d      (|(   }#| j7                  |||(      \  }$}%|j9                  t;        |#             |j9                  t;        |$             |j9                  t;        |%             |r:|j9                  t;        |(                |j9                  t;        |(                ||"    }t=        (fd||d   |d   ||fD              \  }}&}'}}|&|'f}|rt=        (fd||!fD              \  }}}!t        j>                  |"      r n$|s| jA                  |||||!|      \  }}}}}6 |r4|r2| jC                  |||||      \  }}}}| jE                  ||||      \  }}nE| j7                  ||| j"                  dz
        \  }}t        j                  |      | j"                  z  }|j                  |d|      }|||||	fS )Nr:   r   r   r   rz   ry   r   r;  r  r(  )r   r   r  )r  )r  c              3   *   K   | ]
  }|      y wrt   r:   r  r=  r  s     rJ   r  zALightGlueForKeypointMatching._match_image_pair.<locals>.<genexpr>  s"      V" |,V   c              3   *   K   | ]
  }|      y wrt   r:   r  s     rJ   r  zALightGlueForKeypointMatching._match_image_pair.<locals>.<genexpr>  s"      l & #K<0lr  )#r   r,  rh   rW   	new_zerosr<  r  r   r  r\  rx  r  r-   r.   expand	ones_liker   r  ru  get_extended_attention_maskr  r  r  r  rZ   anyr   r  extendr   rl   allr  r  r  ))rG   rc   r  rr  r   re   r  r   r  r  r   r<  r  rc  initial_num_keypointsnum_points_per_pairimage_indicesr  do_early_stopdo_keypoint_pruningr  ra   rb   r  r  pruned_keypoints_indicespruned_keypoints_iterationsr  r   extended_attention_masklayer_outputrf   	attentionr  r  early_stopped_image_indicesearly_stopped_matchesearly_stopped_matching_scoreskeypoints_0
keypoint_1r  s)                                           @rJ   _match_image_pairz.LightGlueForKeypointMatching._match_image_pairM  sg    #7BD0d??1"OOCR(E""5"EII">##E*##E*!  !!2;///
A,a#iiZ(D!L%%j1n6KQO	FJFVt||JN,AB\`!))*q.:OQUQvQvwZ!^FC'	65A	040I0I9M 1J 1
-- -Q/	 --1 #33a7 )+&,.)#(<<3HQW#X#_#_`jmn`npr#s &+oo6N&O# 1 R	K%**,K*.*J*J4Q\*]'*/**j+b/5R[d[k[k*l'?422;?6%9"3L 5A1K	#$5$E! !/)!;1!44+M4+@+@+Mk+Z( +/*M*M,k4L_ +N +'
 +0**Zuzz*R'9901 #6"G"G"JK2?2L/KOKfKf#T;K Lg LH)+H (..t4O/PQNN4(=#>?#**40M+NO*6==dC[\gCh>ij9@@FabmFnAop +>?R>R*S'PU V'2IaL)A,PTVc&dV QMKj$ "-j 9I*fk l !9 ; 4+l gc02MOc 9901" 33#!03,# dY(@$HcQR	h 0 22'25# h*,MwXg (,'F'F.%	($G_ (,'B'B;PTVZVeVehiVi'j$G_050PSWSbSb0b-,M,U,U0-
)
 -
 	
rK   rT  labelsc           
      0   d }|t        d      ||n| j                  j                  }||n| j                  j                  }|j                  dk7  s|j                  d      dk7  rt        d      |j                  \  }}}}	}
|j                  |dz  ||	|
      }| j                  |      }|d d \  }}}}|j                  |ddd      j                  |      }|j                  |dd| j                        j                  |      }|j                  |dd      }|j                         }|d d d d d d df   |
z  |d d d d d d df<   |d d d d d d df   |	z  |d d d d d d df<   | j                  |||	|
|||	      \  }}}}}t        ||||||||
      S )Nz9LightGlue is not trainable, no labels should be provided.   ry   r   zOInput must be a 5D tensor of shape (batch_size, 2, num_channels, height, width)rN   r   r   )re   r  r   )r`   ra   rb   rc   rd   re   rf   rg   )r;   r   r  r   ndimru  r   r   r}  tor  cloner  r_   )rG   rT  r  r  r   r`   r  rc  channelsrr  r   keypoint_detectionsrc   r  re   absolute_keypointsra   rb   rd   rf   rg   s                        rJ   r   z$LightGlueForKeypointMatching.forward  s    XYY1B1N-TXT_T_TqTq$8$D $++JjJj 	 !\%6%6q%9Q%>noo1=1C1C.
Ax#++JNHfeT"44\B*=bq*A'	1k4%%j!R;>>|L	!))*aT=b=bcffgst||J2.&__.);Aq!QJ)G%)O1aA:&);Aq!QJ)G&)P1aA:&EIE[E[/!5 F\ F
B%
 /+'!	
 		
rK   r   rt   )NNN)rR   rS   rT   rU   r'   rF   rW   rX   r  rh   r   r   rZ   rl   r  r  r  r  r  r  r  r  r   r   ri   
LongTensorr   r_   r   r[   r\   s   @rJ   rz  rz    s    @(S (U ( jo? <<?49LL?X`aeXf?	u||U5<<#=>>	??#$)LL#?B#JO,,#didpdp#	#&(U\\ 5<< ^a fkfrfr #_\\#_ <<#_ ll	#_
 #_ ll#_ $ll#_ #_Jk8#*#* #* 	#*
 ||#* 
u||U\\)	*#*V ",0/3k
<<k
 \\k
 	k

 k
 llk
 $D>k
 'tnk
 
u||U\\5<<E	Fk
Z  .2,0/33
''3
 ))*3
 $D>	3

 'tn3
 
u55	63
  3
rK   rz  )rS  rz  r'   rn   )Mr   dataclassesr   typingr   r   r   numpyr   rh   r   torch.nn.utils.rnnr	   configuration_utilsr   image_utilsr   r   r   modeling_flash_attention_utilsr   modeling_utilsr   r   processing_utilsr   utilsr   r   r   r   r   utils.genericr   autor   r   auto.modeling_autor   clip.modeling_clipr   cohere.modeling_coherer   llama.modeling_llamar   r    $superglue.image_processing_supergluer!   r"   r5   r#   PILr$   r%   
get_loggerrR   loggerr'   r_   rn   Moduler   r   r   r	  r   r4  r6  rM  rS  rX   rl   rq  rW   rx  rz  __all__r:   rK   rJ   <module>r     s    ! , ,    + 3 J J B F & ^ ^ - - > ( 9 J k ) $ 
		H	%}#& }#@  :k  :  :F}5 }@ "+) +)\7 P>		 P>f-2\\JO,,
\\&BII &R	BII 	   $ELL $U $uU\\[`[g[gMgGh $@5<<  S U\\ , 
f
#; f

f
R urK   