
    rh#/                    4   d dl mZ d dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
  ej                  e      Zerd dlmZmZmZ 	 d dlmZ 	 d dlmZ 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 dd	Z	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd
Zy# e$ r Y Ew xY w# e$ r Y Iw xY w)    )annotationsN)TYPE_CHECKINGLiteral)save_or_push_to_hub_model)disable_datasets_cachingis_datasets_availableCrossEncoderSentenceTransformerSparseEncoder)OVQuantizationConfig)QuantizationConfigc                   ddl m}m}m} 	 ddlm}	m}
 ddlm} t        | |      xr7 t        |       xr* t        | d   d      xr t        | d   j                  |	      }t        | |      xr7 t        |       xr* t        | d   d      xr t        | d   j                  |	      }t        | |      xr t        | j                  |	      }|s|s|st        d      |s|r| d   j                  }n| j                  }|
j!                  |      t        t"              rTdvrt        d	      d
d
 } t%        |      d      xs) j&                  j(                  j+                          d| 'j&                  j(                  j+                          dt-        fdd|||d| 	       y
# t        $ r t        d      w xY w)a  
    Export a quantized ONNX model from a SentenceTransformer, SparseEncoder, or CrossEncoder model.

    This function applies dynamic quantization, i.e. without a calibration dataset.
    Each of the default quantization configurations quantize the model to int8, allowing
    for faster inference on CPUs, but are likely slower on GPUs.

    See the following pages for more information & benchmarks:

    - `Sentence Transformer > Usage > Speeding up Inference <https://sbert.net/docs/sentence_transformer/usage/efficiency.html>`_
    - `Cross Encoder > Usage > Speeding up Inference <https://sbert.net/docs/cross_encoder/usage/efficiency.html>`_

    Args:
        model (SentenceTransformer | SparseEncoder | CrossEncoder): The SentenceTransformer, SparseEncoder,
            or CrossEncoder model to be quantized. Must be loaded with `backend="onnx"`.
        quantization_config (QuantizationConfig): The quantization configuration.
        model_name_or_path (str): The path or Hugging Face Hub repository name where the quantized model will be saved.
        push_to_hub (bool, optional): Whether to push the quantized model to the Hugging Face Hub. Defaults to False.
        create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
        file_suffix (str | None, optional): The suffix to add to the quantized model file name. Defaults to None.

    Raises:
        ImportError: If the required packages `optimum` and `onnxruntime` are not installed.
        ValueError: If the provided model is not a valid SentenceTransformer, SparseEncoder, or CrossEncoder
            model loaded with `backend="onnx"`.
        ValueError: If the provided quantization_config is not valid.

    Returns:
        None
    r   r	   )ORTModelORTQuantizer)AutoQuantizationConfigzPlease install Optimum and ONNX Runtime to use this function. You can install them with pip: `pip install optimum[onnxruntime]` or `pip install optimum[onnxruntime-gpu]`
auto_modelz}The model must be a Transformer-based SentenceTransformer, SparseEncoder, or CrossEncoder model loaded with `backend="onnx"`.)arm64avx2avx512avx512_vnnizqquantization_config must be an QuantizationConfig instance or one of 'arm64', 'avx2', 'avx512', or 'avx512_vnni'.NF)	is_static_
_quantizedc                ,    j                  |       S )N)file_suffixquantize)save_dirr   quantization_config	quantizers    y/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/backend/quantize.py<lambda>z5export_dynamic_quantized_onnx_model.<locals>.<lambda>p   s    ););<OQYgr);)s     #export_dynamic_quantized_onnx_modelonnx	export_functionexport_function_nameconfigmodel_name_or_pathpush_to_hub	create_prr   backendmodel)sentence_transformersr
   r   r   optimum.onnxruntimer   r   !optimum.onnxruntime.configurationr   ImportError
isinstancelenhasattrr   r/   
ValueErrorfrom_pretrainedstrgetattrweights_dtypenamelowerr   )r/   r    r+   r,   r-   r   r
   r   r   r   r   r   viable_st_modelviable_se_modelviable_ce_model	ort_modelquantization_config_namer!   s    `   `           @r"   r%   r%      s    L WV
>L 	5-. 	6J	6E!Hl+	6 uQx**H5	  	5-( 	6J	6E!Hl+	6 uQx**H5	  !5[*U[[RZ:[O/ L
 	
 /#Ah11	#kk	,,Y7I%s+&PP D  $7q#9 Rg&<>QR]bc!s(;(I(I(N(N(T(T(V'WWXYqXr%s,::??EEGH
SsB"-
Y  
8
 	

s   F4 4G	c
                    ddl m}
m}m} 	 ddlm}m}m} ddlm	} t               st        d      t         |      xr7 t               xr* t         d   d      xr t         d   j                  |      }t         |      xr7 t               xr* t         d   d      xr t         d   j                  |      }t         |
      xr t         j                   |      }|s|s|st#        d      |s|r d   j                  }n j                   }| |       } ||
      |j%                  |      t'        d |||fD              r!t)        d |||fD              st#        d       fd||nd}||nd}||nd}ndt+               5  j-                  ||fd||j.                  nd|      d	d	d	       t1        fdd|||||	d 	       y	# t        $ r t        d      w xY w# 1 sw Y   ;xY w)aw	  
    Export a quantized OpenVINO model from a SentenceTransformer, SparseEncoder, or CrossEncoder model.

    This function applies Post-Training Static Quantization (PTQ) using a calibration dataset, which calibrates
    quantization constants without requiring model retraining. Each default quantization configuration converts
    the model to int8 precision, enabling faster inference while maintaining accuracy.

    See the following pages for more information & benchmarks:

    - `Sentence Transformer > Usage > Speeding up Inference <https://sbert.net/docs/sentence_transformer/usage/efficiency.html>`_
    - `Cross Encoder > Usage > Speeding up Inference <https://sbert.net/docs/cross_encoder/usage/efficiency.html>`_

    Args:
        model (SentenceTransformer | SparseEncoder | CrossEncoder): The SentenceTransformer, SparseEncoder,
            or CrossEncoder model to be quantized. Must be loaded with `backend="openvino"`.
        quantization_config (OVQuantizationConfig | dict | None): The quantization configuration. If None, default values are used.
        model_name_or_path (str): The path or Hugging Face Hub repository name where the quantized model will be saved.
        dataset_name(str, optional): The name of the dataset to load for calibration.
            If not specified, the `sst2` subset of the `glue` dataset will be used by default.
        dataset_config_name (str, optional): The specific configuration of the dataset to load.
        dataset_split (str, optional): The split of the dataset to load (e.g., 'train', 'test'). Defaults to None.
        column_name (str, optional): The column name in the dataset to use for calibration. Defaults to None.
        push_to_hub (bool, optional): Whether to push the quantized model to the Hugging Face Hub. Defaults to False.
        create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
        file_suffix (str, optional): The suffix to add to the quantized model file name. Defaults to `qint8_quantized`.

    Raises:
        ImportError: If the required packages `optimum` and `openvino` are not installed.
        ValueError: If the provided model is not a valid SentenceTransformer, SparseEncoder, or CrossEncoder model
            loaded with `backend="openvino"`.
        ValueError: If the provided quantization_config is not valid.

    Returns:
        None
    r   r	   )OVConfigr   OVQuantizer)OVModelzPlease install datasets, optimum-intel and openvino to use this function. You can install them with pip: `pip install datasets optimum[openvino]`zaPlease install datasets to use this function. You can install it with pip: `pip install datasets`r   zThe model must be a Transformer-based SentenceTransformer, SparseEncoder, or CrossEncoder model loaded with `backend="openvino"`.N)r    c              3  $   K   | ]  }|d u 
 y wN .0params     r"   	<genexpr>z9export_static_quantized_openvino_model.<locals>.<genexpr>   s     
j5
j   c              3  $   K   | ]  }|d u 
 y wrH   rI   rJ   s     r"   rM   z9export_static_quantized_openvino_model.<locals>.<genexpr>   s      w#TwrN   zEither specify all of `dataset_name`, `dataset_config_name`, `dataset_split`, and `column_name`, or leave them all unspecified.c                .    j                  | ddd      S )N
max_lengthi  T)paddingrQ   
truncation)	tokenizer)examplesr/   s    r"   preprocess_functionzCexport_static_quantized_openvino_model.<locals>.preprocess_function   s    x#Z^__r$   gluesst2trainsentencec                     |          S rH   rI   )rU   column_namerV   s    r"   r#   z8export_static_quantized_openvino_model.<locals>.<lambda>   s    1DXkEZ1[ r$   i,  )dataset_namedataset_config_namerV   num_samplesdataset_splitc                ,    j                  |       S )N)save_directory	ov_configr   )r   calibration_datasetrc   r!   s    r"   r#   z8export_static_quantized_openvino_model.<locals>.<lambda>   s    ););I *< *
 r$   &export_static_quantized_openvino_modelopenvinor'   )r0   r
   r   r   optimum.intel.openvinorD   r   rE   optimum.intel.openvino.modelingrF   r3   r   r4   r5   r6   r   r/   r7   r8   anyallr   get_calibration_datasetr_   r   )r/   r    r+   r]   r^   r`   r\   r,   r-   r   r
   r   r   rD   r   rE   rF   r>   r?   r@   ov_modelrd   rc   rV   r!   s   `     `              @@@@r"   re   re   |   s~   ^ WV
	
 	

 	< !"o
 	

 	5-. 	5J	5E!Hl+	5 uQx**G4	  	5-( 	5J	5E!Hl+	5 uQx**G4	  !5Z*U[[RY:ZO/ P
 	
 /!!H//!KK"24-@AI++H5I

j<9Lm]h*i
jjsv w(46I=Ze'fw t  N
 	
` $0#;<L1D1P-V\%2%>MGM!,!8+jK	!	# 
'??% 3 [;N;Z+77`c' @ 

 
 F"-}  
V
 	

j
 
s   G" )G:"G7:H)FFN)r/   2SentenceTransformer | SparseEncoder | CrossEncoderr    zFQuantizationConfig | Literal['arm64', 'avx2', 'avx512', 'avx512_vnni']r+   r9   r,   boolr-   rn   r   
str | NonereturnNone)NNNNFFqint8_quantized)r/   rm   r    z"OVQuantizationConfig | dict | Noner+   r9   r]   ro   r^   ro   r`   ro   r\   ro   r,   rn   r-   rn   r   r9   rp   rq   )
__future__r   loggingtypingr   r   #sentence_transformers.backend.utilsr   sentence_transformers.utilr   r   	getLogger__name__loggerr0   r
   r   r   optimum.intelr   r3   r2   r   r%   re   rI   r$   r"   <module>r|      sT   "  ) I V			8	$VV6H "a=a_a a 	a
 a a 
aP  $&* $"(B=B;B B 	B
 $B B B B B B 
BY    s$    B B BBBB