
    rhD                         d dl Z d dlZd dlZd dlZd dlmZ d dlZd dlZd dlm	Z	 d dl
Z
d dlZd dlZd dlZd dlmZ  G d d      Z G d d      Zd	ej&                  ej(                  d d d
dddddd ddfdZy)    N)
ThreadPool)Queue)get_invlistc                   d    e Zd ZdZ	 	 ddZd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zy)BigBatchSearcherz
    Object that manages all the data related to the computation
    except the actual within-bucket matching and the organization of the
    computation (parallel or not)
    c                 *   || _         g | _        || _        || _        || _        t        j                  |j                        }t        j                  t        |      ||      | _
        dgdz  | _        t        j                         x| _        | _        y )N)keep_maxr      )verbosetictocxqindexuse_float16faissis_similarity_metricmetric_type
ResultHeaplenrht_accutime	t_displayt0)selfr   r   kr   r   r	   s          q/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/faiss/contrib/big_batch_search.py__init__zBigBatchSearcher.__init__   sy     
&--e.?.?@""3r7AAcAg#'99;.    c                 6    t        j                          | _        y N)r   	t_accu_t0r   s    r   start_t_accuzBigBatchSearcher.start_t_accu0   s    r   c                 p    | j                   |xx   t        j                         | j                  z
  z  cc<   y r    )r   r   r!   )r   ns     r   stop_t_accuzBigBatchSearcher.stop_t_accu3   s#    A$))+66r   c                 v    |t        j                          f| _        | j                  dkD  rt        |dd       y y )Nr   Tendflush)r   r   r   print)r   names     r   ticzBigBatchSearcher.tic6   s1    TYY[)<<!$D- r   c                     | j                   \  }}t        j                         |z
  }| j                  dkD  rt        | d|dd       |S )Nr   z: .3fz s)r   r   r   r,   )r   r-   r   dts       r   toczBigBatchSearcher.toc;   sG    ;;bYY[2<<!TF"RHB'(	r   c                    | j                   dk(  s8| j                   dk(  r*|dkD  r%t        j                         | j                  dz   k  ry t        j                         | j                  z
  }t	        d|dd| d| j
                  j                   d	| j                  d
   dd| j                  d   dd| j                  d   dd| j                  d   dd| j                  d   dd| j                  d   ddt        j                  || j
                  j                  z  |dz   z  |z
         dt        j                          | j                   dk  rdndd       t        j                         | _        y )N      i  g      ?[z.1fz	 s] list /z times prep q r   r0   z prep b z comp z res    z	 wait in    z
 wait out    z eta )secondsz mem r(   
Tr)   )r   r   r   r   r,   r   nlistr   datetime	timedeltar   get_mem_usage_kb)r   lts      r   reportzBigBatchSearcher.reportB   sa   <<1LLADTYY[4>>C+?? IIK$''!#wis!DJJ$4$4#5 6 KKN3/xAs7K LKKN3'uT[[^C,@ A{{1~c* +As+ ,%%a

0@0@.@!A#.Fq.HIJ K))+,. *
	
 r   c                    | j                  d       d}t        | j                        }t        j                  || j
                  j                  fd      }t        d||      D ]`  }t        |||z         }| j
                  j                  j                  | j                  || | j
                  j                        \  }}|||| b | j                          || _        y )Nzcoarse quantizationi   int32)dtyper   )r.   r   r   npemptyr   nproberangemin	quantizersearchr2   q_assign)r   bsnqrN   i0i1q_dis_i
q_assign_is           r   coarse_quantizationz$BigBatchSearcher.coarse_quantizationW   s    &'\88R!2!237C2r" 	)BRb!B"&**"6"6"="=2

 1 1#3GZ )HRO	) 	
 r   c                    | j                  d       | j                  }|dz  }t        j                  | j                  | j                  j
                  dz   d      | _        | j                  j                         | _        | j                  dkD  rt        d| j                  d          | j                  dd  | _        | `| j                          y )Nzbucket sortr4      )nbucketntr   z  number of -1s:)r.   rN   r   matrix_bucket_sort_inplacer   r=   bucket_limsravel	query_idsr   r,   r2   )r   rN   s     r   reorder_assignzBigBatchSearcher.reorder_assigne   s    ==A ;;MM4::#3#3a#7B@,,.<<!$d&6&6q&9:++AB/M
r   c                    t        j                          }| j                  }| j                  |   | j                  |dz      }}| j                  || }| j                  |   }| j
                  r||j                  j                  |      z
  }t        j                          }t        |j                  |      \  }	}
| j                  |
j                         }
n| j                  |
      }
| j                  r"|
j                  d      }
|j                  d      }t        j                          }| j                  dxx   ||z
  z  cc<   | j                  dxx   ||z
  z  cc<   |||	|
fS )z4 prepare the queries and database items for bucket lr4   float16r   )r   r   r[   r]   r   by_residualrL   reconstructr   invlistsdecode_funcr\   r   astyper   )r   rA   r   r   rQ   rR   q_subsetxq_lt1list_idsxb_lt2s               r   prepare_bucketzBigBatchSearcher.prepare_bucketr   s*   YY[

!!!$d&6&6q1u&=B>>"R(wwx %//55a88DYY[$U^^Q7$#::<D##D)D;;y)D;;y)DYY[A"r'!A"r'!x--r   c                     |yt        j                          }||}n||   }| j                  j                  |||       | j                  dxx   t        j                          |z
  z  cc<   y)z,add the bucket results to the heap structureNr8   )r   r   add_result_subsetr   )r   rf   Dri   Ir   s         r   add_results_to_heapz$BigBatchSearcher.add_results_to_heap   s[    9YY[9AA!!(Aq1A$))+**r   c                     | j                   j                  | j                  j                  | j                  j                  fS r    )r   shaper   rI   r=   r"   s    r   sizes_in_checkpointz$BigBatchSearcher.sizes_in_checkpoint   s+    tzz00$**2B2BCCr   c                 "   |dz   }t        |d      5 }t        j                  | j                         || j                  j
                  | j                  j                  fd|d       d d d        t        j                  ||       y # 1 sw Y    xY w)Nz.tmpwb)sizes	completedr   )	openpickledumprt   r   ro   rp   osreplace)r   fnamerx   tmpnamefs        r   write_checkpointz!BigBatchSearcher.write_checkpoint   s{    &.'4  	AKK!557!*7799dggii0 b	 	

7E"	 	s   ABBc                 $   t        |d      5 }t        j                  |      }d d d        d   | j                         k(  sJ |d   d   | j                  j
                  d d  |d   d   | j                  j                  d d  |d   S # 1 sw Y   dxY w)Nrbrw   r   r   r4   rx   )rz   r{   loadrt   r   ro   rp   )r   r   r   ckps       r   read_checkpointz BigBatchSearcher.read_checkpoint   s    % 	!!++a.C	!7|t7799994y|		!4y|		!;	! 	!s   BBN)r   F)__name__
__module____qualname____doc__r   r#   r&   r.   r2   rC   rU   r^   rl   rq   rt   r   r    r   r   r   r      sQ     	/$%7.
%*!.8
+D
# r   r   c                   L    e Zd ZdZdej
                  ej                  fdZd Zy)BlockComputerz computation within one bucket knn_functionc                    || _         |j                  t        j                  k(  r1t        j                  |j
                  |j                        }d }d}n\|j                  t        j                  k(  rt        j                  |j
                  |j                  j                  |j                  j                  |j                        }|j                  |_	        |j                  j                  }d|_        |j                  }n|j                  t        j                  k(  rzt        j                   |j
                  |j"                  j$                  |j                        }|j"                  |_        |j"                  j                  }d|_        |j                  }nt'        d|j                   d      || _        |dk(  rd n|| _        || _        || _        || _        || _        y )Nc                 $    | j                  d      S )Nfloat32)view)xs    r   <lambda>z(BlockComputer.__init__.<locals>.<lambda>   s    AFF9$5 r   FTzindex type z not supportedr   )r   	__class__r   IndexIVFFlat	IndexFlatdr   
IndexIVFPQIndexPQpqMnbitsdecode
is_trainedra   IndexIVFScalarQuantizerIndexScalarQuantizersqqtypeRuntimeError
index_helprd   methodpairwise_distancesknn)r   r   r   r   r   r   rd   ra   s           r   r   zBlockComputer.__init__   si    
??e000%2C2CDJ5KK__ 0 00UXX^^U5F5FHJ!HHJM$--..K$(J!++K__ = ==33):):<J!HHJM$--..K$(J!++KU__,=^LMM$#)W#44+&"4r   c                    | j                   j                  }|j                  dk(  s|j                  dk(  rd x}}||fS | j                  dk(  rgt	        j
                  || j                  j                         t        |      | j                  _	        | j                  j                  ||      \  }}||fS | j                  dk(  r| j                  |||      }d }||fS | j                  dk(  r | j                  |||fd|i|\  }}fS )Nr   r   r   )metricr   r   )r   r   sizer   r   copy_array_to_vectorr   codesr   ntotalrM   r   r   )	r   rg   rj   ri   r   
extra_argsr   ro   rp   s	            r   block_searchzBlockComputer.block_search   s   jj,,99>TYY!^LA !t [[G#&&tT__-B-BC%(]DOO"??))$2DAq !t [[00''d;'GAA !t [[N*488D$L+LLDAq!tr   N)	r   r   r   r   r   r   r   r   r   r   r   r   r   r      s%    )
 "$77		!Fr   r   r   Fr4   i   ry   c                    
123  j                   }|dv sJ |j                  }t        |      |z  t        j                  d      j
                  z  }t        |      z  t        j                  d      j
                  t        j                  d      j
                  z   z  }||z   |z   }|dkD  r)t        j                  d| d| d| d	| d
|dz  dd       t         |||      1t         |||      22j                  1_
        2j                  1_        |1j                          n|1_        1j                          | j                  }t!               }|||fd j                  fk(  sJ t"        j$                  j'                  |      rKt        j                  d|        1j)                  |      }t        j                  dt        |              nt        j                  d       |dk(  rt+        ||      D ]  }1j-                  |       1j/                  |      \  }}}}t1        j0                         }2j3                  |||      \  }}1j4                  dxx   t1        j0                         |z
  z  cc<   1j7                  ||||        n|dk(  r1 fd}1j/                  |      } d}!t9        d      }"t+        ||      D ]  }1j-                  |       |"j;                  ||!|dz   f      }#| \  }}}}1j=                          2j3                  |||      \  }}1j?                  d       ||||f}!1j=                          |#jA                         } 1j?                  d         1j6                  |!  |"jC                          nd 33fd}$d"1fd	}%2
fd}&tE        d      }'tE        d      }( |$|&
d
t!               |(|'      }) |$|%|	||||'d      }*t1        j0                         }+	 t        j                  d       |(jA                         },|,sn|,\  }-}.}/}0}}}}|-|k(  rddz   1j4                  dxx   |0z  cc<   1j4                  dxx   |.z  cc<   1j4                  dxx   |/z  cc<   t        j                  d|-        1j7                  ||||       t        j                  d|-        |jG                  |-       1j-                  |-       |Ut1        j0                         |+z
  |kD  r;t        j                  d        1jI                  ||       t1        j0                         }+B|*jK                          |)jK                          1jM                  d!       1jN                  jQ                          1jS                          1jN                  jT                  1jN                  jV                  fS )#a  
    Search queries xq in the IVF index, with a search function that collects
    batches of query vectors per inverted list. This can be faster than the
    regular search indexes.
    Supports IVFFlat, IVFPQ and IVFScalarQuantizer.

    Supports three computation methods:
    method = "index":
        build a flat index and populate it separately for each index
    method = "pairwise_distances":
        decompress codes and compute all pairwise distances for the queries
        and index and add result to heap
    method = "knn_function":
        decompress codes and compute knn results for the queries

    threaded=0: sequential execution
    threaded=1: prefetch next bucket while computing the current one
    threaded=2: prefetch prefetch_threads buckets at a time.

    compute_threads>1: the knn function will get an additional thread_no that
        tells which worker should handle this.

    In threaded mode, the computation is tiled with the bucket perparation and
    the writeback of results (useful to maximize GPU utilization).

    use_float16: convert all matrices to float16 (faster for GPU gemm)

    q_assign: override coarse assignment, should be a matrix of size nq * nprobe

    checkpointing (only for threaded > 1):
    checkpoint: file where the checkpoints are stored
    checkpoint_freq: when to perform checkpoinging. Should be a multiple of threaded

    start_list, end_list: process only a subset of invlists
    )r   r   r   rE   int64r   r   zmemory: queries z assign z result z total z = i   @r0   z GiB)r   r   )r   r   r   Nzrecovering checkpoint: z   already completed: z$no checkpoint: starting from scratchr5   r4   c                 h    |  j                   |   |j                  k  rj                  |      S y)z` perform the addition for the previous bucket and
            prefetch the next (if applicable) N)rq   r=   rl   )to_addrA   bbsr   s     r   add_results_and_prefetchz2big_batch_search.<locals>.add_results_and_prefetcha  s<     !'''05;;))!,, r   r9   c           
         	 t        |      5 }t        ||      D cg c]  }||vr|j                  | |||f       }	}|	D ]  }
|
j                           |j	                          |j                          d d d        |j                  d        y c c}w # 1 sw Y    xY w#  t        j                          t        j                           xY w)N)args)r   rJ   apply_asyncgetclosejoinput	traceback	print_exc_threadinterrupt_main)task	pool_size
start_taskend_taskrx   output_queueinput_queuepooliresrs              r   task_manager_threadz-big_batch_search.<locals>.task_manager_thread~  s    	* 	 d "'z8!</ I-	  ++{; , = /C /
 !   JJLIIK	    &/	  	 ##%&&(s2   B! B!B9B6B! BBB! !+Cc                  d    t        j                  |       }d|_        |j                          |S )N)targetr   T)	threadingThreaddaemonstart)r   task_managerr   s     r   r   z&big_batch_search.<locals>.task_manager  s5    $++*L #'L r   c                    	 t        j                  d|         j                  |       \  }}}}|j                  | ||||f       t        j                  d|         y #  t	        j
                          t        j                           xY w)NzPrepare start: zPrepare end: )logginginforl   r   r   r   r   r   )task_idr   r   rf   rg   ri   rj   r   s          r   prepare_taskz&big_batch_search.<locals>.prepare_task  s    wi89141C1CG1L.$$  '8T8T!JK}WI67##%&&(s   AA   +Bc           
      b   	 t        j                  d|         d}	 t        j                         }t        j                  d|         |j                         }t        j                         |z
  }||j	                  d        n|\  }}}	}
}t        j                  d|  d|        t        j                         }dkD  rj                  |	||
|       \  }}nj                  |	||
      \  }}t        j                         |z
  }t        j                  d|  d|        t        j                         }|j	                  |||||||
|f       t        j                         |z
  }Lt        j                  d	|         y #  t        j                          t        j                           xY w)
NzCompute start: r   zCompute input: task zCompute work: task z, centroid r4   )	thread_idzCompute output: task zCompute end: )
r   r   r   r   r   r   r   r   r   r   )r   r   r   
t_wait_outr   input_value	t_wait_incentroidrf   rg   ri   rj   ro   rp   	t_computecompcomputation_threadsr   s                  r   compute_taskz&big_batch_search.<locals>.compute_task  s    wi89
BLL#7y!AB"-//"3K $		b 0I"*#-?J<HhhLL#6wi{8*!UVB*Q.#00 $!w  1  1  $00tXqI1 $		b 0ILL#8	XJ!WXB $$!9j)XqRZ\]^ "&r!1J1 2 }WI67##%&&(s   E?F +F.zWaiting for resultr:   zAdding to heap start: centroid zAdding to heap end: centroid zwriting checkpointzfinalize heapr    ),rI   nbytesr   rG   rF   itemsizer   r   r   r   rd   ra   rU   rN   r^   r=   setr}   pathexistsr   rJ   rC   rl   r   r   r   rq   r   r   r#   r&   r   r   r   addr   r   r.   r   finalizer2   ro   rp   )4r   r   r   r   r   r   r   threadedr   prefetch_threadsr   rN   
checkpointcheckpoint_freq
start_listend_listcrash_atrI   mem_queries
mem_assignmem_resmem_totrx   rA   rf   rg   ri   rj   t0iro   rp   r   prefetched_bucketr   r   prefetched_bucket_ar   r   r   prepare_to_compute_queuecompute_to_main_queuecompute_task_managerprepare_task_managert_checkpointvaluer   r   r   r   r   r   r   s4   ` `       `                                      @@@r   big_batch_searchr      sd   h \\FDDDD))KR6!BHHW$5$>$>>J"gk
""
((9

&
&	'G J&0G{{m8J< @YggYc'U2CC1HN	

 r1C -	D &&CO&&CO!;;IH%!U[[)999977>>*%LL2:,?@++J7ILL1#i.1ABCLL?@1} z8, 	>AJJqM-0-?-?-B*HdHd))+C$$T41=DAqJJqMTYY[3..M##Ha1=	> 
Q	-  ..z:!}z8, 	AJJqM"&"2"2(61q5/#;->*HdHd$$T41=DAqOOAq(A-F 3 7 7 9OOA	 	 (

	4	 		!	F $)8  %a+E!$ 
  ,$ 
 yy{LL-.)--/ESXPHiY!Xq8#AJJqMY&MJJqMY&MJJqMZ'MLL:8*EF##Ha1=LL8
CDMM(#JJx %99;-?LL!56((Y?#'99;L+ . 	!!#!!#GGOFFOOGGI6688SVVXXr   )r   r{   r}   r   multiprocessing.poolr   r   r   queuer   r   r>   numpyrG   r   faiss.contrib.inspect_toolsr   r   r   r   r   r   r   r   r   <module>r     s      	  +        3X  X v5 5t  33IIYr   