
    rh+                         d dl Z d dlmZ ddlmZ ddlmZ ddlmZm	Z	m
Z
mZ  e       rd dlmZ 	 	 dde j                  d	e j                  d
ededef
dZ G d de      Z G d de	      Z	 	 	 	 ddZy)    N   )center_to_corners_format)is_scipy_available   )HungarianMatcher	ImageLoss_set_aux_lossgeneralized_box_iou)linear_sum_assignmentinputstargets	num_boxesalphagammac                     | j                         }t        j                  j                  | |d      }||z  d|z
  d|z
  z  z   }|d|z
  |z  z  }|dk\  r||z  d|z
  d|z
  z  z   }	|	|z  }|j	                         |z  S )aR  
    Loss used in RetinaNet for dense detection: https://huggingface.co/papers/1708.02002.

    Args:
        inputs (`torch.FloatTensor` of arbitrary shape):
            The predictions for each example.
        targets (`torch.FloatTensor` with the same shape as `inputs`)
            A tensor storing the binary classification label for each element in the `inputs` (0 for the negative class
            and 1 for the positive class).
        num_boxes (`int`):
            The total number of boxes in the batch.
        alpha (`float`, *optional*, defaults to 0.25):
            Optional weighting factor in the range (0,1) to balance positive vs. negative examples.
        gamma (`int`, *optional*, defaults to 2):
            Exponent of the modulating factor (1 - p_t) to balance easy vs hard examples.

    Returns:
        Loss tensor
    none)	reductionr   r   )sigmoidnn
functional binary_cross_entropy_with_logitssum)
r   r   r   r   r   probce_lossp_tlossalpha_ts
             x/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/loss/loss_grounding_dino.pysigmoid_focal_lossr      s    4 >>Dmm<<VWX^<_G
.AHW5
5Cq3w5()Dz'/QY1w;$??~88:	!!    c                   :    e Zd Z ej                         d        Zy)GroundingDinoHungarianMatcherc           	         |d   j                   dd \  }}|d   j                  dd      j                         }|d   j                  dd      }|d   }t        j                  t        ||      D 	cg c]  \  }}	||	d       c}	}      }||j                  d	d
      z  }t        j                  |D 
cg c]  }
|
d   	 c}
      }d}d}d|z
  ||z  z  d|z
  dz   j                          z  }|d|z
  |z  z  |dz   j                          z  }||z
  |j                         z  }t        j                  ||d      }t        t        |      t        |             }| j                  |z  | j                  |z  z   | j                  |z  z   }|j                  ||d	      j!                         }|D 
cg c]  }
t#        |
d          }}
t%        |j'                  |d	            D cg c]  \  }}t)        ||          }}}|D cg c]O  \  }}t        j*                  |t        j,                        t        j*                  |t        j,                        fQ c}}S c c}	}w c c}
w c c}
w c c}}w c c}}w )a  
        Args:
            outputs (`dict`):
                A dictionary that contains at least these entries:
                * "logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
                * "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates.
                * "label_maps": Tuple of tensors of dim [num_classes, hidden_dim].
            targets (`list[dict]`):
                A list of targets (len(targets) = batch_size), where each target is a dict containing:
                * "class_labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of
                  ground-truth
                 objects in the target) containing the class labels
                * "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates.

        Returns:
            `list[Tuple]`: A list of size `batch_size`, containing tuples of (index_i, index_j) where:
            - index_i is the indices of the selected predictions (in order)
            - index_j is the indices of the corresponding selected targets (in order)
            For each batch element, it holds: len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
        logitsNr   r   r   
pred_boxes
label_mapsclass_labelsT)dimkeepdimboxes      ?       @g:0yE>)p)dtype)shapeflattenr   torchcatzipr   logtcdistr
   r   	bbox_cost
class_cost	giou_costviewcpulen	enumeratesplitr   	as_tensorint64)selfoutputsr   
batch_sizenum_queriesout_probout_bboxr&   	label_maptargetvtarget_bboxr   r   neg_cost_classpos_cost_classr9   r8   r:   cost_matrixsizesicindicesjs                            r   forwardz%GroundingDinoHungarianMatcher.forwardD   sf   , #*("3"9"9"1"=
K 8$,,Q2::<<(00A6\*
 YY[^_ikr[stFWiQW	&*@ Atu
*..R."FF
 iiW =7 => e)%8a(lT>Q=V=V=X<XY1x<E"9:4?T?T?V>VW$~5G
 KK+;	 ))A()KMefqMrss	 nny04??Z3OORVR`R`clRll!&&z;CGGI*12QQwZ22;D[EVEVW\^`Ea;bc41a(1.cckrscgcdfg%++6QVQ\Q\8]^ss7  u
 !>( 3css   1I
4I"/I'$I,AI2N)__name__
__module____qualname__r2   no_gradrT    r    r   r"   r"   C   s    U]]_8t 8tr    r"   c                   "    e Zd ZdZd Zd Zd Zy)GroundingDinoImageLossa  
    This class computes the losses for `GroundingDinoForObjectDetection`. The process happens in two steps: 1) we
    compute hungarian assignment between ground truth boxes and the outputs of the model 2) we supervise each pair of
    matched ground-truth / prediction (supervise class and box).

    Args:
        matcher (`GroundingDinoHungarianMatcher`):
            Module able to compute a matching between targets and proposals.
        focal_alpha (`float`):
            Alpha parameter in focal loss.
        losses (`list[str]`):
            List of all the losses to be applied. See `get_loss` for a list of all available losses.
    c                 l    t         j                  j                  |        || _        || _        || _        y N)r   Module__init__matcherfocal_alphalosses)rB   r`   ra   rb   s       r   r_   zGroundingDinoImageLoss.__init__   s*    
		4 &r    c                    |d   }t        j                  t        t        ||            D cg c]2  \  }\  }\  }}|dkD  r|d   |   t	        |d   |         z   n|d   |   4 c}}}}      }	t        j                  |d   d      }
| j                  |      }t        j                  ||j                  t         j                        }|
|	   j                  t         j                        ||<   |S c c}}}}w )z>
        Create one_hot based on the matching indices
        r$   r   r'   r&   )r)   )devicer/   )
r2   r3   r>   r4   r=   _get_source_permutation_idx
zeros_likerd   longto)rB   rC   r   rR   r$   rP   rI   _Jr'   r&   idxtarget_classes_onehots                r   _get_target_classes_one_hotz2GroundingDinoImageLoss._get_target_classes_one_hot   s     "yy ,5S'5J+K 'A'A NOQRU~&q)C0Ea0H,IIX^_mXnopXqq
 YYw|4!<
..w7 % 0 0UZU_U_ `%/%=%@%@%Lc"$$s   7C2c                 0   d|vrt        d      d|vrt        d      | j                  |||      }|d   }|d   }t        j                  ||      }t        j                  ||      }|j	                         }t        |||| j                  d      }d|i}	|	S )z
        Classification loss (Binary focal loss) targets dicts must contain the key "class_labels" containing a tensor
        of dim [nb_target_boxes]
        r$   z#No logits were found in the outputs	text_maskz&No text_mask were found in the outputsr   )r   r   r   r   r   loss_ce)KeyErrorrm   r2   masked_selectfloatr   ra   )
rB   rC   r   rR   r   rl   source_logitsro   rp   rb   s
             r   loss_labelsz"GroundingDinoImageLoss.loss_labels   s    
 7"@AAg%CDD $ @ @'SZ [)K(	 ++M9E % 3 34I9 U 5 ; ; =$ )""
 W%r    N)rU   rV   rW   __doc__r_   rm   ru   rY   r    r   r[   r[      s    %(r    r[   c           
         t        |j                  |j                  |j                        }g d}t	        ||j
                  |      }|j                  |       i }| |d<   ||d<   ||d<   ||d<   d }|j                  r"t        ||      }|D ]  }||d<   ||d<    ||d<    |||      |j                  rG|	|
||d	} |||      }|j                         D ci c]  \  }}|d
z   | }}}j                  |       d|j                  |j                  d|j                  r7j                         D ci c]  \  }}|d
z   | }}}j                  |       |j                  rii }t        |j                  dz
        D ];  }|j                  j                         D ci c]  \  }}|d| z   | c}}       = j                  |       t!        fdD              }||fS c c}}w c c}}w c c}}w )N)r9   r8   r:   )labelsr+   cardinality)r`   ra   rb   r$   r%   r&   ro   auxiliary_outputs)r$   r%   r&   ro   _encr-   )rp   	loss_bbox	loss_giour   ri   c              3   >   K   | ]  }|v s|   |   z    y wr]   rY   ).0k	loss_dictweight_dicts     r   	<genexpr>z6GroundingDinoForObjectDetectionLoss.<locals>.<genexpr>  s%     T1CSy|k!n,Ts   	)r"   r9   r8   r:   r[   ra   rh   auxiliary_lossr	   	two_stageitemsupdatebbox_loss_coefficientgiou_loss_coefficientrangedecoder_layersr   )r$   rx   rd   r%   configr&   ro   outputs_classoutputs_coordencoder_logitsencoder_pred_boxesr`   rb   	criterionoutputs_lossrz   
aux_outputencoder_outputs_lossencoder_loss_dictr   rJ   enc_weight_dictaux_weight_dictrP   r   r   r   s                            @@r   #GroundingDinoForObjectDetectionLossr      s@    ,$$0@0@FL\L\G 0F&&&I
 LLL#L!+L!+L )L)-G+ 	0J'1J|$&/J{#	0 ->(),/I$,$"	 
 &&:FC7H7N7N7PQtq!QZ]QQ*+ 1111K 5@5F5F5HITQ1v:q=II?+v,,q01 	UA""{?P?P?R#Stq!A!A3KN#ST	U?+TiTTD---) R J $Ts   G*7G0"G6)r,   r   )NNNN)r2   torch.nnr   image_transformsr   utilsr   loss_for_object_detectionr   r   r	   r
   scipy.optimizer   Tensorintrs   r   r"   r[   r   rY   r    r   <module>r      s      7 & f f 4 $"LL$"\\$" $" 	$"
 $"N:t$4 :tzFY Fb F.r    