
    rh:                         d dl Zd dlZd dlZd dlZd dlmZ d Zd ZddZ	ddZ
d Zd Z	 dd	Zd
 ZddZd Z G d d      Z G d de      Z G d d      Z G d d      Zy)    N)
ThreadPoolc                       j                   \  }}j                   ||fk(  sJ t         fdt        |      D              }| j                  z  S )z< computes the intersection measure of two result tables
    c              3   j   K   | ]*  }t        j                  |   |         j                   , y wN)npintersect1dsize).0iI1I2s     k/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/faiss/contrib/evaluation.py	<genexpr>z+knn_intersection_measure.<locals>.<genexpr>   s2       	r!ube$))s   03)shapesumranger	   )r   r   nqrankninters   ``   r   knn_intersection_measurer      sT     xxHB88Dz!!! r F BGG    c                     | j                   dz
  }||k  }t        j                  |       }t        |      D ]*  }||   || |   | |dz       j	                         z   ||dz   <   , |||   ||   fS )z select a set of results    )r	   r   
zeros_liker   r   )limsDIthreshr   masknew_limsr   s           r   filter_range_resultsr!      s    	QBv:D}}T"H2Y J"1+T!WtAE{(C(G(G(IIQJQtWag%%r   c                 8   	
  fd
fd j                   dz
  }j                   dz
  |k(  sJ t        j                  |d      		
fd}t        d      }|j	                  |t        |             t         dd  dd	 z
  dd dd	 z
  	|
      S )zucompute the precision and recall of range search results. The
    function does not take the distances into account. c                      |    | dz       S Nr    r   Ireflims_refs    r   ref_result_forz range_PR.<locals>.ref_result_for,       HQKQ00r   c                      |    | dz       S r$   r%   )r   Inewlims_news    r   new_result_forz range_PR.<locals>.new_result_for/   r*   r   r   int64dtypec                 n     |       } |       }t        j                  ||      }t        |      | <   y r   )r   r   len)qgt_idsnew_idsinterr.   r   r)   s       r   compute_PR_forz range_PR.<locals>.compute_PR_for7   s:      " !# vw/Jq	r      Nmode)r	   r   zerosr   mapr   counts_to_PR)r(   r'   r-   r,   r<   r   r8   poolr.   r   r)   s   ````    @@@r   range_PRrA   (   s    11 
	B==1"""XXb(F b>DHH^U2Y'x}$x}$	 r   c                    |dk(  r\| j                         |j                         |j                         }}} |dkD  r||z  }nd}| dkD  r	|| z  }||fS |dk(  rd}||fS d}||fS |dk(  r~| dk(  }d| |<   || z  }||   dk(  j                  t              ||<   |dk(  }t        j                  ||   dk(        sJ d||<   d||<   ||z  }|j                         |j                         fS t               )z computes a  precision-recall for a ser of queries.
    ngt = nb of GT results per query
    nres = nb of found results per query
    ninter = nb of correct results per query (smaller than nres of course)
    overallr         ?        averager   )r   astypefloatr   allmeanAssertionError)	ngtnresr   r<   	precisionrecallr   recalls
precisionss	            r   r?   r?   P   s/    yGGItxxz6::<6T!8II7c\F &   QYF &   F&  		 axD	3,dq007 qyvvfTla'(((tT
d]
 ',,.00 r   c                    t        j                  |      }t        j                  |      }t        |       dz
  }t        |      D ]9  }| |   | |dz      }}||| }	||| }
|
j	                         }|	|   ||| |
|   ||| ; ||fS )z& sort 2 arrays using the first as key r   )r   
empty_liker3   r   argsort)r   r   r   r   D2r   r   l0l1iidios               r   sort_range_res_2r[   ~   s    	q	B	q	B	TQB2Y a$q1u+Br"Xr"XJJLqE2b	qE2b	 r6Mr   c                     t        j                  |      }t        |       dz
  }t        |      D ]*  }| |   | |dz      }}||| ||| ||| j	                          , |S r$   )r   rS   r3   r   sort)r   r   r   r   r   rV   rW   s          r   sort_range_res_1r^      sm    	q	B	TQB2Y a$q1u+BbH2b	
2b	 Ir   c           	      F    d|v rt               d|v rt              \   fdfd j                  dz
  }j                  dz
  |k(  sJ t              }	t	        j
                  ||	dfd      fd	}
t        d
      }|j                  |
t        |             t	        j
                  |	      }t	        j
                  |	      }t        |	      D ]6  }t        dd|df   dd|df   dd|df   |      \  }}|||<   |||<   8 ||fS )z compute precision-recall values for range search results
    for several thresholds on the "new" results.
    This is to plot PR curves
    refnewc                      |    | dz       S r$   r%   r&   s    r   r)   z4range_PR_multiple_thresholds.<locals>.ref_result_for   r*   r   c                 2    |    | dz      }}|| || fS r$   r%   )r   rV   rW   Dnewr,   r-   s      r   r.   z4range_PR_multiple_thresholds.<locals>.new_result_for   s/    !hq1uoBBr{DBK''r   r      r/   r0   c                     	|       } |       \  }}t        |      | d d df<   |j                  dk(  ry t        j                  |
      }|| d d df<   |j                  dk(  ry t        j                  ||      }d||t        |      k(  <   t        j                  ||   |k(        }t        j
                  dg|f      }||   | d d df<   y )Nr   r   r:      )r3   r	   r   searchsortedcumsumhstack)r4   r5   res_idsres_disrM   rX   n_okcountsr.   r)   
thresholdss          r   r8   z4range_PR_multiple_thresholds.<locals>.compute_PR_for   s    ")!,f+q!Qw<<1 oogz2q!Qw;;! __VW- "2Vyyw./ yy1#t%t*q!Qwr   r9   Nr   rg   r;   )
r^   r[   r	   r3   r   r=   r   r>   r   r?   )r(   r'   r-   rd   r,   ro   r<   do_sortr   ntr8   r@   rQ   rP   tprrn   r.   r)   s   ``````           @@@r   range_PR_multiple_thresholdsru      s.    $/ %hd;
d1( 
	B==1"""	ZBXXr2qk1F%4 b>DHH^U2Y' "JhhrlG2Y q!Qw1a&Aq/
1 
1
 wr   c                 0   t        j                  | |g      }|j                          t        |      }t        j                  |      }|dd |dd z
  |dd |||kD     }t        j
                  || d      dz
  }t        j
                  ||d      dz
  }||fS )zt for two tables, cluster them by merging values closer than thr.
    Returns the cluster ids for each table element r   Nr:   right)side)r   rj   r]   r3   onesrh   )	tab1tab2thrtabndiffsunique_valsidx1idx2s	            r   _cluster_tables_with_tolerancer      s     ))T4L
!CHHJCAGGAJEAB#cr("E!"Ieck"K??;7;a?D??;7;a?D:r   c           
         t         j                  j                  | ||       t        j                         }t        t        |            D ]  }t        j                  ||   ||   k(        r"|| |   j                         z  }t        | |   ||   |      \  }}	t        j                  |      D ]>  }
|
|d   k(  r||
k(  }|j                  t        |||f         t        |||f                @  y)zS test that knn search results are identical, with possible ties.
    Raise if not. )rtolr:   N)r   testingassert_allcloseunittestTestCaser   r3   rI   maxr   uniqueassertEqualset)Drefr'   rd   r,   r   testcaser   rt   DrefCDnewCdisr   s               r   check_ref_knn_with_drawsr      s     JJtT5  "H3t9 I66$q'T!W$% 47;;= 5d1gtAwJu99U# 	ICeBiC<D  T!T']!3Sag5GH		IIr   c                    t         j                  j                  | |       t        |       dz
  }t	        |      D ]  }| |   | |dz      }	}|||	 }
|||	 }|||	 }|||	 }t        j
                  |
|k(        rn;d } ||
|      \  }
} |||      \  }}t         j                  j                  |
|       t         j                  j                  ||d        y)zM compare range search results wrt. a reference result,
    throw if it fails r   c                 6    | j                         }| |   ||   fS r   )rT   )r   r   rZ   s      r   sort_by_idsz,check_ref_range_results.<locals>.sort_by_ids  s    IIKtQqTz!r      )decimalN)r   r   assert_array_equalr3   r   rI   assert_array_almost_equal)Lrefr   r'   Lnewrd   r,   r   r   rV   rW   Ii_refIi_newDi_refDi_newr   s                  r   check_ref_range_resultsr   	  s     JJ!!$-	TQB2Y Ha$q1u+Bbbbb66&F"#"  +66:VV*66:VVJJ))&&9


,,VVQ,G!Hr   c                   :    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
y	)
OperatingPointszw
    Manages a set of search parameters with associated performance and time.
    Keeps the Pareto optimal points.
    c                      g | _         g | _        y r   )operating_pointssuboptimal_pointsselfs    r   __init__zOperatingPoints.__init__,  s    !
 "$r   c                     t         )z1 return -1 if k1 > k2, 1 if k2 > k1, 0 otherwise NotImplementedr   k1k2s      r   compare_keyszOperatingPoints.compare_keys3      r   c                     t         )zC parameters to say we do noting, takes 0 time and has 0 performancer   r   s    r   do_nothing_keyzOperatingPoints.do_nothing_key7  r   r   c                 H    | j                   D ]  \  }}}||k\  s||k  s y y)NFT)r   )r   perf_newt_new_perfrr   s         r   is_pareto_optimalz!OperatingPoints.is_pareto_optimal;  s3    // 	JAtQxAJ	 r   c                     d}d}| j                   | j                  z   D ]2  \  }}}| j                  ||      }|dkD  r||kD  r|}|dk  s+||k  s1|}4 ||fS )z, predicts the bound on time and performance rE   rD   r   )r   r   r   )r   keymin_timemax_perfkey2r   rr   cmps           r   predict_boundszOperatingPoints.predict_boundsA  sw    !22T5K5KK 	$MD$##C.CQwx< HQw(?#H	$ !!r   c                 N    | j                  |      \  }}| j                  ||      S r   )r   r   )r   r   r   r   s       r   should_run_experimentz%OperatingPoints.should_run_experimentO  s*    #22378%%h99r   c                    | j                  ||      rd}|t        | j                        k  rp| j                  |   \  }}}||k\  r:||k  r5| j                  j	                  | j                  j                  |             n|dz  }|t        | j                        k  rp| j                  j	                  |||f       y| j                  j	                  |||f       y)Nr   r   TF)r   r3   r   r   appendpop)r   r   r   rr   r   op_Lsperf2t2s           r   add_operating_pointz#OperatingPoints.add_operating_pointS  s    !!$*Ac$//00#'#8#8#; ub5=QV**11--11!46 FA c$//00 !!((#tQ8""))3a.9r   N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r%   r   r   r   r   &  s*    
$":r   r   c                   ^    e Zd ZdZd Zd Zd Zd Zd Ze	j                  fdZd Zd	 Zd
 Zy)OperatingPointsWithRangesz
    Set of parameters that are each picked from a discrete range of values.
    An increase of each parameter is assumed to make the operation slower
    and more accurate.
    A key = int array of indices in the ordered set of parameters.
    c                 <    t         j                  |        g | _        y r   )r   r   rangesr   s    r   r   z"OperatingPointsWithRanges.__init__m  s      &r   c                 >    | j                   j                  ||f       y r   )r   r   r   namevaluess      r   	add_rangez#OperatingPointsWithRanges.add_ranger  s    D&>*r   c                 h    t        j                  ||k\        ryt        j                  ||k\        ryy)Nr   r:   r   )r   rI   r   s      r   r   z&OperatingPointsWithRanges.compare_keysu  s+    66"(66"(r   c                 ^    t        j                  t        | j                        t              S )Nr0   )r   r=   r3   r   intr   s    r   r   z(OperatingPointsWithRanges.do_nothing_key|  s    xxDKK(44r   c                     t        t        j                  | j                  D cg c]  \  }}t	        |       c}}            S c c}}w r   )r   r   prodr   r3   r   s      r   num_experimentsz)OperatingPointsWithRanges.num_experiments  s0    277DKKHLD&CKHIJJHs   Ac                 @   |dk(  s|dk\  sJ | j                         }t        j                  j                  d      }|dk(  s||k  r|j	                  |dz
        }n|j                  |dz
  |dz
  d      }d|dz
  g|D cg c]  }t        |      dz    c}z   }|S c c}w )z} sample a set of experiments of max size n_autotune
        (run all experiments in random order if n_autotune is 0)
        r   rg   {   F)r	   replacer   )r   r   randomRandomStatepermutationchoicer   )r   
n_autotunerstotexexperimentscnos         r   sample_experimentsz,OperatingPointsWithRanges.sample_experiments  s     Q*/11$$&YY""3'?ej0..3K))	
Q $ ?K %!)n'LC1'LL (Ms   ?Bc                     t        j                  t        | j                        t              }t        | j                        D ]'  \  }\  }}|t        |      z  ||<   |t        |      z  }) |dk(  sJ |S )z/Convert a sequential experiment number to a keyr0   r   )r   r=   r3   r   r   	enumerate)r   r   kr   r   r   s         r   
cno_to_keyz$OperatingPointsWithRanges.cno_to_key  sn    HHS%S1!*4;;!7 	 A~fV$AaDCKC	  axxr   c           	      x    t        | j                        D ci c]  \  }\  }}||||       c}}}S c c}}}w )z3Convert a key to a dictionary with parameter values)r   r   )r   r   r   r   r   s        r   get_parametersz(OperatingPointsWithRanges.get_parameters  sG     &/t{{%;
 
!>D& &1,
 	
 
s   5c                     | j                   D ]&  \  }}||k(  s|D cg c]
  }||k  s	| }}||dd  y t        d| d      c c}w )z% remove too large values from a rangeNz
parameter z
 not found)r   RuntimeError)r   r   max_valname2r   vval2s          r   restrict_rangez(OperatingPointsWithRanges.restrict_range  s`    ![[ 	ME6u}#)9aQ[99 q			
 ZvZ899 :s
   
AAN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r%   r   r   r   r   e  s>    
+5K 13		  
:r   r   c                       e Zd Zd Zd Zy)	TimerIterc                     g | _         |j                  | _        || _        |j                  dk\  r t	        j
                  |j                         y y )Nr   )tsrunstimerrq   faissomp_set_num_threads)r   r   s     r   r   zTimerIter.__init__  s=    JJ	
88q=%%ehh/ r   c                    | j                   }| xj                  dz  c_        | j                  j                  t	        j                                t        | j                        dk\  r| j                  d   | j                  d   z
  nd}| j                  dk(  s||j                  kD  r|j                  dk\  rt        j                  |j                         t        j                  | j                        }|dd  |d d z
  }t        |      |j                  k(  r||j                  d  |_        t        |d d  |_        t        y )Nr   rg   r:   r   )r   r   r   r   timer3   max_secsrq   r   r   remember_ntr   arraywarmuptimesStopIteration)r   r   
total_timer   r  s        r   __next__zTimerIter.__next__  s    

		Q	tyy{#14TWW1BTWWR[4771:-
99?j5>>9xx1}))%*;*;<$''"BqrFRW$E5zUZZ'#ELLN3   $Ah :r   N)r   r   r   r   r  r%   r   r   r   r     s    0 r   r   c                   L    e Zd ZdZdddej
                  fdZd Zd Zd Z	d	 Z
y
)RepeatTimeru!  
    This is yet another timer object. It is adapted to Faiss by
    taking a number of openmp threads to set on input. It should be called
    in an explicit loop as:

    timer = RepeatTimer(warmup=1, nt=1, runs=6)

    for _ in timer:
        # perform operation

    print(f"time={timer.get_ms():.1f} ± {timer.get_ms_std():.1f} ms")

    the same timer can be re-used. In that case it is reset each time it
    enters a loop. It focuses on ms-scale times because for second scale
    it's usually less relevant to repeat the operation.
    r   r:   r   c                 |    ||k  sJ || _         || _        || _        || _        t	        j
                         | _        y r   )r  rq   r   r  r   omp_get_max_threadsr  )r   r  rq   r   r  s        r   r   zRepeatTimer.__init__  s;    }}	  446r   c                     t        |       S r   )r   r   s    r   __iter__zRepeatTimer.__iter__  s    r   c                 F    t        j                  | j                        dz  S )N  )r   rJ   r  r   s    r   mszRepeatTimer.ms  s    wwtzz"T))r   c                 z    t        | j                        dkD  r"t        j                  | j                        dz  S dS )Nr   r  rE   )r3   r  r   stdr   s    r   ms_stdzRepeatTimer.ms_std  s.    ,/

Oa,?rvvdjj!D(HSHr   c                 ,    t        | j                        S )zJ effective number of runs (may be lower than runs - warmup due to timeout))r3   r  r   s    r   nrunszRepeatTimer.nruns  s    4::r   N)r   r   r   r   r   infr   r  r  r  r  r%   r   r   r
  r
    s0       BQ 7*Ir   r
  )rC   )rC   zref,new)gh㈵>)numpyr   r   r   r   multiprocessing.poolr   r   r!   rA   r?   r[   r^   ru   r   r   r   r   r   r   r
  r%   r   r   <module>r     s        +
	&%P,\ %.	G\I,H:< <~D: D:T   2$ $r   