
    rht=                     :   d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	 d dl
Z
 G d de      Z	 d dlmZ d dlmZmZmZ dZdd
Z	 ddd	dddeeeeee	eeegef   f   f         fdZ ej2                  e      Z e        G d d             Zy# e$ r d	ZY Xw xY w)    N)Enum)AnyCallableOptionalUnionc                   ,    e Zd ZdZdZdZdZdZdZdZ	dZ
y	)
RequestStatusz5Status of a generation request through its lifecycle.pending
prefillingprefilling_splitsplit_pending_remainderdecodingfinishedfailedN)__name__
__module____qualname____doc__PENDING
PREFILLINGPREFILLING_SPLITSPLIT_PENDING_REMAINDERDECODINGFINISHEDFAILED     m/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/utils/metrics.pyr	   r	   
   s*    ?GJ)7HHFr   r	   )metrics)Status
StatusCode
get_tracerTFc                 $     t         sd S  fd}|S )a  
    Decorator that attaches a tracer to a class.

    This decorator should be applied to classes that need OpenTelemetry tracing.
    It adds a tracer attribute to the class instance that can be used by the traced decorator.

    Args:
        tracer_name_template: Optional template string for the tracer name.
            If provided, it should contain {module} which will be replaced with the class's full module path
            and {class_name} for the class name.
            If None, a default naming scheme will be used where:
              - If the module already starts with "transformers.", it will use that directly
              - Otherwise, it will prepend "transformers." to the module name

    Returns:
        Class decorator function
    c                     | S Nr   )clss    r   <lambda>zattach_tracer.<locals>.<lambda>2   s    3 r   c                 p      j                   t        j                         fd       }| _          S )Nc                      | g|i | j                   }j                  }"|j                  d      r| d| }nd| d| }nj                  ||      }t	        |      | _        y )Ntransformers..)module
class_name)r   r   
startswithformatr"   tracer)	selfargskwargsmodule_namer-   tracer_namer&   original_inittracer_name_templates	         r   init_with_tracerz:attach_tracer.<locals>.decorator.<locals>.init_with_tracer7   s    $000..K))J#+))/:%0M:,"?K$1+a
|"LK299Yc9d$[1DKr   )__init__	functoolswraps)r&   r8   r6   r7   s   ` @r   	decoratorz attach_tracer.<locals>.decorator4   s5    		'	2 
(	2  (
r   )_has_opentelemetry)r7   r<   s   ` r   attach_tracerr>      s    $ . r   )	span_name
standaloneadditional_attributesrA   c                .    fd}| |S  ||       S )a  
    Decorator to trace function calls with OpenTelemetry.

    Can be used as @traced or @traced(span_name="custom_name")

    Args:
        func: The function to trace
        span_name: Optional custom name for the span (defaults to function name)
        standalone: If True, creates a parentless span
        additional_attributes: Optional list of additional attributes to set on the span.
          Each item is a tuple of (instance_attribute_name, span_attribute_key, value_or_transform_function)
          where:
            - instance_attribute_name: Name of the attribute to get from the class instance
            - span_attribute_key: Key to use when setting the attribute on the span
            - value_or_transform_function: Either a raw value to use directly, or a function to transform
              the attribute value before setting it on the span

    Returns:
        Decorated function with tracing
    c                 \     t         s S dd l} |j                          fd       }|S )Nr   c            
      @   | rt        d      rj                  | d   nd }|d u}|rt        |d      r|j                  }n%t        dj                   dj
                         }xs j
                  }r|j                  n|j                  } ||      5 }|j                  dj
                         |j                  dj                         |j                  d|       | r{t        |       D ]m  \  }}	t        |	t        t        t        t        f      s|	|j                  d	| t        |	             G|j                  d	| t        t        |	                   o |r|j!                         D ]m  \  }
}t        |t        t        t        t        f      s||j                  d
|
 t        |             G|j                  d
|
 t        t        |                   o rP|rND ]I  }|\  }}}t        ||      st#        ||      }t%        |      r	 ||      }n|}|j                  ||       K 	  | i |}|cd d d        S # t&        $ r?}|j)                  t+        t,        j.                               |j1                  |        d }~ww xY w# 1 sw Y   y xY w)N__self__r   r0   r*   r+   zfunction.namezfunction.modulezfunction.is_methodzargs.zkwargs.)hasattrrE   r0   r"   r   r   
start_spanstart_as_current_spanset_attribute	enumerate
isinstancestrintfloatbooltypeitemsgetattrcallable	Exception
set_statusr    r!   ERRORrecord_exception)r2   r3   instance	is_methodr0   namespan_fnspaniargkeyvalueattr_configinstance_attribute_namespan_attribute_keyvalue_or_transform_functionattribute_valuetransformed_valueresulterA   funcr?   r@   s                       r   wrapperz*traced.<locals>.decorator.<locals>.wrapperp   s|   "&GD*,E$--JctAwjnH ,IWXx8!#mDOO3DAdmm_$UV-D+5f''6;W;WG #$""?DMMB""#4dooF""#7C"+D/ L3%cCeT+BCs{ ..qc{CHE ..qc{CS	NK	L
 &,lln R
U%ec3t-DE ..E
K ..DK@PQ	R )Y'< Vcn`/1CE`"8-DE.5h@W.XO'(CD4OP_4` 14O 1 ../ACTUV!4262F!?# #@ ! OOF:+;+;$<=))!,A# #s1   E%J?6J6	I			J:JJJJ)r=   r:   r;   )ri   r:   rj   rA   r?   r@   s   `  r   r<   ztraced.<locals>.decoratorj   s3    !K		.	 
.	` r   r   )ri   r?   r@   rA   r<   s    ``` r   tracedrk   N   s     87r |T?r   c                       e Zd ZdZdefdZd Zedede	ddfd	       Z
ed
eddfd       Zedd       Zedededdfd       Zedede	ddfd       Zy)ContinuousBatchProcessorMetricsz0Metrics collection for ContinuousBatchProcessor.max_batch_tokensc                 2    || _         | j                          y)zInitialize metrics for continuous batch processor.

        Args:
            max_batch_tokens: Maximum number of tokens in a batch
        N)rn   _setup_metrics)r1   rn   s     r   r9   z(ContinuousBatchProcessorMetrics.__init__   s     !1r   c                 J   t         st        j                  d       yt        j                  d      | _        g d}| j
                  j                  ddd|      | _        | j
                  j                  d	d
d      | _	        | j
                  j                  ddd      | _
        g d}| j
                  j                  ddd|      | _        | j
                  j                  ddd      | _        | j
                  j                  ddd      | _        | j
                  j                  ddd      | _        g d}| j
                  j                  ddd|      | _        | j
                  j                  ddd       | _        | j
                  j                  d!d"d       | _        y)#zIInitialize OpenTelemetry metrics and tracing if the library is available.zIOpenTelemetry is not installed. Metrics and tracing will not be recorded.Nz2transformers.generation.continuous_batch_processor)
      2   K   d         i,    i        '  ttft_millisecondsz#Time to first token in millisecondsms)rZ   descriptionunit#explicit_bucket_boundaries_advisoryactive_requests_countz3Number of active requests currently being processedrequests)rZ   r   r   waiting_requests_countz*Number of requests waiting to be processed)rt   rv      ry   rz   r{   r|   r}   i N  i0u  i`  request_latency_millisecondsz9End-to-end latency for completed requests in millisecondsdecode_prefill_ratioz3Ratio of decode tokens to prefill tokens in a batchratioprefill_tokens_processedz"Number of prefill tokens processedtokensdecode_tokens_processedz!Number of decode tokens processed)   rr         (   rt   <   F   P   Z   _   b   rv   batch_fill_percentagez5Percentage of max_batch_tokens utilized in each batchpercentkv_cache_free_memory_bytesz/Free memory of the PagedAttentionCache in bytesbyteskv_cache_memory_bytesz0Memory usage of the PagedAttentionCache in bytes)r=   loggerinfor   	get_metermetercreate_histogramttft_histogramcreate_gaugeactive_requests_gaugewaiting_requests_gaugerequest_latency_histogramdecode_prefill_ratio_gaugecreate_counterprefill_tokens_counterdecode_tokens_counterbatch_fill_percentage_histogramkv_cache_free_memory_gaugekv_cache_memory_gauge)r1   ttft_bucketslatency_bucketsbatch_fill_bucketss       r   rp   z.ContinuousBatchProcessorMetrics._setup_metrics   s    "KKcd&&'[\
 _"jj99$=0<	 : 
 &*ZZ%<%<(M &= &
" '+jj&=&=)D '> '
# \)-)D)D/S0?	 *E *
& +/***A*A'M +B +
' '+jj&?&?+< '@ '
# &*ZZ%>%>*; &? &
" R/3zz/J/J(O0B	 0K 0
, +/***A*A-I +B +
' &*ZZ%<%<(J &= &
"r   created_time
request_idreturnNc                    t         syt        j                         |z
  dz  }	 | j                  j                  |       t        j                  d| d|dd       y# t        $ r"}t        j                  d|        Y d}~yd}~ww xY w)zRecord Time to First Token (TTFT).

        Args:
            created_time: The time the request was created
            request_id: The ID of the request
        N     @@zRecorded TTFT for request : .2fr   zFailed to record TTFT metric: )r=   timer   recordr   debugrT   warning)r1   r   r   ttft_msrh   s        r   record_ttft_metricz2ContinuousBatchProcessorMetrics.record_ttft_metric
  s     "99;-7	A&&w/LL5j\GC=PRST 	ANN;A3?@@	A   8A 	B%BBrequests_in_batchc                    t         r|syd}d}|D ]j  }|j                  t        j                  k(  r|dz  }&|j                  t        j                  t        j
                  fv sS|t        |j                        z  }l ||z   }	 |dkD  r| j                  j                  |       |dkD  r| j                  j                  |       |dkD  r ||z  }| j                  j                  |       || j                  z  dz  }| j                  j                  |       t         j#                  d| d| d|dd	| d
| j                   d       y# t$        $ r"}t         j'                  d|        Y d}~yd}~ww xY w)zRecord metrics about the batch composition including decode/prefill ratio and batch fill percentage.

        Args:
            requests_in_batch: List of request states in the current batch
        Nr      g      Y@zBatch metrics: z decode tokens, z prefill tokens, batch fill: r   z% (/)z Failed to record batch metrics: )r=   statusr	   r   r   r   len
prompt_idsr   addr   r   setrn   r   r   r   r   rT   r   )	r1   r   decode_tokensprefill_tokensstatetotal_batch_tokensr   fill_percentagerh   s	            r   record_batch_metricsz4ContinuousBatchProcessorMetrics.record_batch_metrics  s    "):& 	8E||}555"-":":M<Z<Z![[#e&6&6"77		8 +^;	C!++//?q **..}=!%6//33E:1D4I4IIURO0077HLL!-0@@P Q.s337I6J!DLaLaKbbce  	CNN=aSABB	Cs   CE 	E1E,,E1c                    t         sy	 |j                  t        |j                        z
  }t        |j                        }|j
                  t        j                  t        j                  fv rdnd}||z  |j                  z  |j                  z  |j                  z  dz  |z  }|t        |j                        z  |j                  z  |j                  z  |j                  z  dz  |z  }| j                  j                  |       | j                  j                  |       t        j!                  d|dz  dd| d|j                   d	||j                  z  d
z  dd	       y# t"        $ r"}t        j%                  d|        Y d}~yd}~ww xY w)a&  Record memory usage of the PagedAttentionCache without GPU synchronization.

        This calculates the theoretical memory usage based on cache configuration
        and the number of blocks currently in use.

        Args:
            cache: The PagedAttentionCache object to measure
        N      zKV Cache memory: i   r   zMB, Used blocks: r   z (rv   z.1fz%)z*Failed to record KV cache memory metrics: )r=   
num_blocksr   _free_blocks	key_cachedtypetorchfloat16bfloat16
block_sizenum_key_value_headshead_dimr   r   r   r   r   rT   r   )r1   cachenum_used_blocks
num_layersbytes_per_parametermemory_bytesfree_memory_bytesrh   s           r   record_kv_cache_memory_metricsz>ContinuousBatchProcessorMetrics.record_kv_cache_memory_metricsF  s    "'	M#..U5G5G1HHOU__-J (-{{u}}enn6U'U![\
 !"""# ++, ..	!
  &&  e(()*""# ++, ..	!
  &&  &&**<8++//0ABLL#LK$@#E F  /0%2B2B1C D#e&6&66<SAE
  	MNNGsKLL	Ms   EE 	F
(FF
active_requestswaiting_requestsc                    t         sy	 | j                  j                  |       | j                  j                  |       t        j                  d| d| d       y# t        $ r"}t        j                  d|        Y d}~yd}~ww xY w)zRecord metrics about active and waiting requests.

        Args:
            active_requests: Number of active requests
            waiting_requests: Number of waiting requests
        NzQueue metrics: z active requests, z waiting requestsz Failed to record queue metrics: )r=   r   r   r   r   r   rT   r   )r1   r   r   rh   s       r   record_queue_metricsz4ContinuousBatchProcessorMetrics.record_queue_metrics|  s     "	C&&**?;''++,<=LL??*;;MN^M__pqr 	CNN=aSABB	Cs   AA 	B%BBc                    t         syt        j                         |z
  dz  }	 | j                  j                  |       t        j                  d| d|dd       y# t        $ r"}t        j                  d|        Y d}~yd}~ww xY w)zRecord metrics about a completed request.

        Args:
            created_time: The time the request was created
            request_id: The ID of the request
        Nr   z Recorded request completion for r   r   r   z,Failed to record request completion metric: )r=   r   r   r   r   r   rT   r   )r1   r   r   
latency_msrh   s        r   record_request_completionz9ContinuousBatchProcessorMetrics.record_request_completion  s     "iikL0F:
	O**11*=LL;J<r*UXIYY[\] 	ONNI!MNN	Or   )r   N)r   r   r   r   rM   r9   rp   rk   rN   rL   r   listr   r   r   r   r   r   r   rm   rm      s    : O
b Au A# A$ A A$ &Cd &Ct &C &CP 3M 3Mj CC C3 CSW C C" Oe O OQU O Or   rm   r%   )r:   loggingr   enumr   typingr   r   r   r   r   r	   opentelemetryr   opentelemetry.tracer    r!   r"   r=   ImportErrorr>   r   tuplerL   rk   	getLoggerr   r   rm   r   r   r   <module>r      s        1 1 	D 	%BB
,` 
W _cW
 $DsCsHcUTWZDX?X9Y/Y)Z$[\Wt 
		8	$ tO tO tOc  s   B BB