
    rh=                    b    d Z ddlmZ ddlZddlZddlZddlmZ  G d d      Z G d d	e      Z	y)
a@  
This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.

Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.

Instead, you should create a `datasets` `Dataset` for training: https://huggingface.co/docs/datasets/create_dataset
    )annotationsN   )InputExamplec                  D    e Zd ZdZddddej
                  dddfdZdd	Zy
)STSDataReadera1  Reads in the STS dataset. Each line contains two sentences (s1_col_idx, s2_col_idx) and one label (score_col_idx)

    Default values expects a tab separated file with the first & second column the sentence pair and third column the score (0...1). Default config normalizes scores from 0...5 to 0...1
    r   r      	T   c
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        y )N)	dataset_folderscore_col_idx
s1_col_idx
s2_col_idx	delimiterquotingnormalize_scores	min_score	max_score)
selfr   r   r   r   r   r   r   r   r   s
             ~/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/readers/STSDataReader.py__init__zSTSDataReader.__init__   sF     -*$$" 0""    c           
        t         j                  j                  | j                  |      }|j	                  d      rt        j                  |dd      nt        |d      5 }t        j                  || j                  | j                        }g }t        |      D ]  \  }}t        || j                           }	| j                  r)|	| j                  z
  | j                   | j                  z
  z  }	|| j"                     }
|| j$                     }|j'                  t)        |t+        |      z   |
|g|	             |dkD  st-        |      |k\  s n d	d	d	       |S # 1 sw Y   S xY w)
zJfilename specified which data split to use (train.csv, dev.csv, test.csv).z.gzrtutf8)encodingzutf-8)r   r   )guidtextslabelr   N)ospathjoinr   endswithgzipopencsvreaderr   r   	enumeratefloatr   r   r   r   r   r   appendr   strlen)r   filenamemax_examplesfilepathfIndataexamplesidrowscores1s2s               r   get_exampleszSTSDataReader.get_examples0   s5   77<< 3 3X>   ' IIhv6h1	 ::cT^^T\\RDH$T? 
Cc$"4"456(("T^^38WXE))(SW2DRQSH\a bc!#H(E
	& '	& s   !CE=EEE#N)r   )__name__
__module____qualname____doc__r&   
QUOTE_NONEr   r8    r   r   r   r      s/     #,r   r   c                  H     e Zd ZdZddddej
                  dddf fd	Z xZS )	STSBenchmarkDataReaderzReader especially for the STS benchmark dataset. There, the sentences are in column 5 and 6, the score is in column 4.
    Scores are normalized from 0...5 to 0...1
    r
         r	   Tr   c
                6    t         
|   |||||||||		       y )N)	r   r   r   r   r   r   r   r   r   )superr   )r   r   r   r   r   r   r   r   r   r   	__class__s             r   r   zSTSBenchmarkDataReader.__init__N   s2     	)!!'- 	 
	
r   )r9   r:   r;   r<   r&   r=   r   __classcell__)rE   s   @r   r@   r@   I   s0     
 
r   r@   )
r<   
__future__r   r&   r$   r     r   r   r@   r>   r   r   <module>rI      s2    # 
  	 2 2j
] 
r   