
    rhZ                        d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	 ddl
mZ ddlmZmZ d	d
lmZmZmZ  e       rddlZ ej(                  e      ZdZ	 	 	 	 d/de	ee   df   dedee   fdZ e       r:edfdej8                  j:                  dedee   dej8                  j:                  fdZ	 	 	 	 d/dee   dedee   fdZ G d de      Z  G d de      Z! G d de      Z" G d de"      Z# G d de      Z$ G d de      Z% G d  d!e      Z& G d" d#e      Z' G d$ d%e      Z( G d& d'e      Z) G d( d)e      Z*d*dd*d*d	d*d*d*d*d+	Z+e$e"e#e!e%e&e'e(e)e*d,
Z,d-d-d-d-d-d.d-d-d-d-d,
Z-y)0zGLUE processors and helpers    N)asdict)Enum)OptionalUnion   )PreTrainedTokenizer)is_tf_availablelogging   )DataProcessorInputExampleInputFeaturesu  This {0} will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-classification/run_glue.pyexamplesztf.data.Dataset	tokenizer
max_lengthc                    t        j                  t        j                  d      t               t               r@t        | t        j                  j                        r|t        d      t        | |||      S t        | |||||      S )a=  
    Loads a data file into a list of `InputFeatures`

    Args:
        examples: List of `InputExamples` or `tf.data.Dataset` containing the examples.
        tokenizer: Instance of a tokenizer that will tokenize the examples
        max_length: Maximum example length. Defaults to the tokenizer's max_len
        task: GLUE task
        label_list: List of labels. Can be obtained from the processor using the `processor.get_labels()` method
        output_mode: String indicating the output mode. Either `regression` or `classification`

    Returns:
        If the `examples` input is a `tf.data.Dataset`, will return a `tf.data.Dataset` containing the task-specific
        features. If the input is a list of `InputExamples`, will return a list of task-specific `InputFeatures` which
        can be fed to the model.

    functionzWWhen calling glue_convert_examples_to_features from TF, the task parameter is required.r   task)r   r   
label_listoutput_mode)warningswarnDEPRECATION_WARNINGformatFutureWarningr	   
isinstancetfdataDataset
ValueError%_tf_glue_convert_examples_to_features"_glue_convert_examples_to_features)r   r   r   r   r   r   s         t/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/data/processors/glue.py!glue_convert_examples_to_featuresr%   )   st    2 MM%,,Z8-HZ"''//B<vww4XyU_fjkk-)
*bm     returnc                 $  
 t        |          }| D cg c]"  }|j                  |j                  |            $ } }t        | |||      
|dk(  rt        j
                  nt        j                  }
fd}|j                  }t        j                  j                  j                  |t        j                  |t        j                        |f|D 	ci c]  }	|	t	        j                  dg       c}	t	        j                  g       f      S c c}w c c}	w )zb
        Returns:
            A `tf.data.Dataset` containing the task-specific features.

        r   sts-bc               3      K   D ]H  } t        |       j                         D ci c]  \  }}|	|| }}}|j                  d      }||f J y c c}}w w)Nlabel)r   itemspop)exkvdr+   featuress        r$   genz2_tf_glue_convert_examples_to_features.<locals>.gen^   s\      !&,Rj&6&6&8JdaAMQTJJg%j !Js   "A
AA#AN)glue_processorstfds_mapget_example_from_tensor_dictr%   r   float32int64model_input_namesr   r    from_generatordictfromkeysint32TensorShape)r   r   r   r   	processorexample
label_typer3   input_namesr/   r2   s             @r$   r"   r"   N   s     $D)+	gop\cI&&y'M'Mg'VWpp4XyU_fjk#'7?RZZ
	!  11ww--]];1:>1<=Aa''=r~~b?QR
 	
 q >s   'DDc                    ||j                   }|`t        |          }|+|j                         }t        j	                  d| d|        $t
        |   t        j	                  d d|        t        |      D ci c]  \  }}||
 c}}dt        dt        t        t        d f   ffd}	| D 
cg c]
  }
 |	|
       }}
 || D 
cg c]  }
|
j                  |
j                  f c}
|dd	      }g }t        t        |             D ];  }|D ci c]  }|||   |    }}t        di |d
||   i}|j!                  |       = t        | d d       D ]W  \  }}
t        j	                  d       t        j	                  d|
j"                          t        j	                  d||           Y |S c c}}w c c}
w c c}
w c c}w )NzUsing label list z
 for task zUsing output mode r@   r'   c                     | j                   y dk(  r| j                      S dk(  rt        | j                         S t              )Nclassification
regression)r+   floatKeyError)r@   	label_mapr   s    r$   label_from_examplez>_glue_convert_examples_to_features.<locals>.label_from_example   sJ    == **W]]++L(''{##r&   r   T)r   padding
truncationr+      z*** Example ***zguid: z
features:  )model_max_lengthr4   
get_labelsloggerinfoglue_output_modes	enumerater   r   intrG   text_atext_brangelenr   appendguid)r   r   r   r   r   r   r?   ir+   rJ   r@   labelsbatch_encodingr2   r/   inputsfeaturerI   s        `           @r$   r#   r#   m   s    //
#D)+	"--/JKK+J<z$HI+D1KKK,[MD6JK*3J*?@ha@I$L $U3t;K5L $ :BBg )BFB9ABg'..'..	)B	N H3x=! !3ABa!^A&q))BB:&:q	: 	!  !- 0
7%&fW\\N+,j!./0
 OA A C 	C Cs   F67F<GGc                       e Zd ZdZdZy)
OutputModerE   rF   N)__name__
__module____qualname__rE   rF   rN   r&   r$   rb   rb      s    %NJr&   rb   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	MrpcProcessorz/Processor for the MRPC data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y Nr?   super__init__r   r   r   r   r   selfargskwargs	__class__s      r$   rl   zMrpcProcessor.__init__   /    $)&))00=}Mr&   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S See base class.idx	sentence1utf-8	sentence2r+   r   numpydecodestrrn   tensor_dicts     r$   r6   z*MrpcProcessor.get_example_from_tensor_dict   n    $$&$**,33G<$**,33G<G$**,-	
 	
r&   c                     t         j                  dt        j                  j	                  |d              | j                  | j                  t        j                  j	                  |d            d      S )ru   zLOOKING AT 	train.tsvtrain)rQ   rR   ospathjoin_create_examples	_read_tsvrn   data_dirs     r$   get_train_examplesz MrpcProcessor.get_train_examples   sQ    k"'',,x"E!FGH$$T^^BGGLL;4W%XZabbr&   c                     | j                  | j                  t        j                  j	                  |d            d      S ru   zdev.tsvdevr   r   r   r   r   r   s     r$   get_dev_exampleszMrpcProcessor.get_dev_examples   .    $$T^^BGGLL94U%VX]^^r&   c                     | j                  | j                  t        j                  j	                  |d            d      S ru   ztest.tsvtestr   r   s     r$   get_test_exampleszMrpcProcessor.get_test_examples   .    $$T^^BGGLL:4V%WY_``r&   c                 
    ddgS ru   01rN   rn   s    r$   rP   zMrpcProcessor.get_labels       Szr&   c           	          g }t        |      D ]F  \  }}|dk(  r| d| }|d   }|d   }|dk(  rdn|d   }	|j                  t        ||||	             H |S )5Creates examples for the training, dev and test sets.r   -r      r   Nr[   rV   rW   r+   rT   rZ   r   
rn   linesset_typer   r\   liner[   rV   rW   r+   s
             r$   r   zMrpcProcessor._create_examples   s~     ' 	`GAtAvZq$D!WF!WF$.DDGEOOLd6&X]^_	` r&   rc   rd   re   __doc__rl   r6   r   r   r   rP   r   __classcell__rq   s   @r$   rg   rg      s-    9N
c
_ar&   rg   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	MnliProcessorz3Processor for the MultiNLI data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y ri   rj   rm   s      r$   rl   zMnliProcessor.__init__   rr   r&   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S )ru   rv   premiserx   
hypothesisr+   rz   r~   s     r$   r6   z*MnliProcessor.get_example_from_tensor_dict   sn    $$&	"((*11':%++-44W=G$**,-	
 	
r&   c                     | j                  | j                  t        j                  j	                  |d            d      S ru   r   r   r   r   s     r$   r   z MnliProcessor.get_train_examples   .    $$T^^BGGLL;4W%XZabbr&   c                     | j                  | j                  t        j                  j	                  |d            d      S )ru   zdev_matched.tsvdev_matchedr   r   s     r$   r   zMnliProcessor.get_dev_examples   s/    $$T^^BGGLLK\4]%^`mnnr&   c                     | j                  | j                  t        j                  j	                  |d            d      S )ru   ztest_matched.tsvtest_matchedr   r   s     r$   r   zMnliProcessor.get_test_examples   s/    $$T^^BGGLLK]4^%_aoppr&   c                 
    g dS )ru   )contradiction
entailmentneutralrN   r   s    r$   rP   zMnliProcessor.get_labels   s    99r&   c           	          g }t        |      D ]U  \  }}|dk(  r| d|d    }|d   }|d   }|j                  d      rdn|d   }	|j                  t        ||||	             W |S )	r   r   r      	   r   Nr   )rT   
startswithrZ   r   r   s
             r$   r   zMnliProcessor._create_examples   s     ' 	`GAtAvZqa	*D!WF!WF$//7DT"XEOOLd6&X]^_	` r&   r   r   s   @r$   r   r      s-    =N
coq:r&   r   c                   .     e Zd ZdZ fdZd Zd Z xZS )MnliMismatchedProcessorz>Processor for the MultiNLI Mismatched data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y ri   rj   rm   s      r$   rl   z MnliMismatchedProcessor.__init__	  rr   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S )ru   zdev_mismatched.tsvdev_mismatchedr   r   s     r$   r   z(MnliMismatchedProcessor.get_dev_examples  s/    $$T^^BGGLLK_4`%acsttr&   c                     | j                  | j                  t        j                  j	                  |d            d      S )ru   ztest_mismatched.tsvtest_mismatchedr   r   s     r$   r   z)MnliMismatchedProcessor.get_test_examples  s/    $$T^^BGGLLK`4a%bduvvr&   )rc   rd   re   r   rl   r   r   r   r   s   @r$   r   r     s    HNuwr&   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	ColaProcessorz/Processor for the CoLA data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y ri   rj   rm   s      r$   rl   zColaProcessor.__init__  rr   r&   c           	          t        |d   j                         |d   j                         j                  d      dt        |d   j                                     S ru   rv   sentencerx   Nr+   rz   r~   s     r$   r6   z*ColaProcessor.get_example_from_tensor_dict  U    $$&
#))+227;G$**,-	
 	
r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   z ColaProcessor.get_train_examples&  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   zColaProcessor.get_dev_examples*  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   zColaProcessor.get_test_examples.  r   r&   c                 
    ddgS r   rN   r   s    r$   rP   zColaProcessor.get_labels2  r   r&   c           	          |dk(  }|r|dd }|rdnd}g }t        |      D ]8  \  }}| d| }||   }	|rdn|d   }
|j                  t        ||	d|
             : |S )r   r   r   Nr   r   r   r   )rn   r   r   	test_mode
text_indexr   r\   r   r[   rV   r+   s              r$   r   zColaProcessor._create_examples6  s    &	!"IE#Q
 ' 	^GAtZq$D*%F%D47EOOLd6$V[\]		^
 r&   r   r   s   @r$   r   r     s-    9N
c_ar&   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	Sst2Processorz0Processor for the SST-2 data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y ri   rj   rm   s      r$   rl   zSst2Processor.__init__H  rr   r&   c           	          t        |d   j                         |d   j                         j                  d      dt        |d   j                                     S r   rz   r~   s     r$   r6   z*Sst2Processor.get_example_from_tensor_dictL  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   z Sst2Processor.get_train_examplesU  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   zSst2Processor.get_dev_examplesY  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   zSst2Processor.get_test_examples]  r   r&   c                 
    ddgS r   rN   r   s    r$   rP   zSst2Processor.get_labelsa  r   r&   c           	          g }|dk(  rdnd}t        |      D ]A  \  }}|dk(  r| d| }||   }|dk(  rdn|d   }	|j                  t        ||d|	             C |S )r   r   r   r   r   Nr   r   )
rn   r   r   r   r   r\   r   r[   rV   r+   s
             r$   r   zSst2Processor._create_examplese  s    "f,Q!
 ' 	^GAtAvZq$D*%F$.DDGEOOLd6$V[\]	^ r&   r   r   s   @r$   r   r   E  s-    :N
c_ar&   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	StsbProcessorz0Processor for the STS-B data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y ri   rj   rm   s      r$   rl   zStsbProcessor.__init__v  rr   r&   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S rt   rz   r~   s     r$   r6   z*StsbProcessor.get_example_from_tensor_dictz  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   z StsbProcessor.get_train_examples  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   zStsbProcessor.get_dev_examples  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   zStsbProcessor.get_test_examples  r   r&   c                     dgS )ru   NrN   r   s    r$   rP   zStsbProcessor.get_labels  s	    vr&   c           	          g }t        |      D ]I  \  }}|dk(  r| d|d    }|d   }|d   }|dk(  rdn|d   }	|j                  t        ||||	             K |S )	r   r   r      r   r   Nr   r   r   r   s
             r$   r   zStsbProcessor._create_examples       ' 	`GAtAvZqa	*D!WF!WF$.DDHEOOLd6&X]^_	` r&   r   r   s   @r$   r   r   s  s-    :N
c_ar&   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	QqpProcessorz.Processor for the QQP data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y ri   rj   rm   s      r$   rl   zQqpProcessor.__init__  rr   r&   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S )ru   rv   	question1rx   	question2r+   rz   r~   s     r$   r6   z)QqpProcessor.get_example_from_tensor_dict  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   zQqpProcessor.get_train_examples  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   zQqpProcessor.get_dev_examples  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   zQqpProcessor.get_test_examples  r   r&   c                 
    ddgS r   rN   r   s    r$   rP   zQqpProcessor.get_labels  r   r&   c           	          |dk(  }|rdnd}|rdnd}g }t        |      D ]G  \  }}|dk(  r| d|d    }		 ||   }
||   }|rdn|d	   }|j                  t        |	|
||
             I |S # t        $ r Y Ww xY w)r   r   r   r      r   r   r   NrM   r   )rT   
IndexErrorrZ   r   )rn   r   r   r   q1_indexq2_indexr   r\   r   r[   rV   rW   r+   s                r$   r   zQqpProcessor._create_examples  s    &	!1q!1q ' 
	`GAtAvZqa	*Dhh )tAw OOLd6&X]^_
	`   s   A++	A76A7r   r   s   @r$   r   r     s-    8N
c_ar&   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	QnliProcessorz/Processor for the QNLI data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y ri   rj   rm   s      r$   rl   zQnliProcessor.__init__  rr   r&   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S )ru   rv   questionrx   r   r+   rz   r~   s     r$   r6   z*QnliProcessor.get_example_from_tensor_dict  sn    $$&
#))+227;
#))+227;G$**,-	
 	
r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   z QnliProcessor.get_train_examples  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   zQnliProcessor.get_dev_examples  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   zQnliProcessor.get_test_examples  r   r&   c                 
    ddgS ru   r   not_entailmentrN   r   s    r$   rP   zQnliProcessor.get_labels      .//r&   c           	          g }t        |      D ]I  \  }}|dk(  r| d|d    }|d   }|d   }|dk(  rdn|d   }	|j                  t        ||||	             K |S 	r   r   r   r   r   r   Nr   r   r   r   s
             r$   r   zQnliProcessor._create_examples  r   r&   r   r   s   @r$   r   r     s-    9N
c_a0r&   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	RteProcessorz.Processor for the RTE data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y ri   rj   rm   s      r$   rl   zRteProcessor.__init__  rr   r&   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S rt   rz   r~   s     r$   r6   z)RteProcessor.get_example_from_tensor_dict
  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   zRteProcessor.get_train_examples  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   zRteProcessor.get_dev_examples  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   zRteProcessor.get_test_examples  r   r&   c                 
    ddgS r  rN   r   s    r$   rP   zRteProcessor.get_labels  r  r&   c           	          g }t        |      D ]I  \  }}|dk(  r| d|d    }|d   }|d   }|dk(  rdn|d   }	|j                  t        ||||	             K |S r  r   r   s
             r$   r   zRteProcessor._create_examples#  r   r&   r   r   s   @r$   r  r    s-    8N
c_a0r&   r  c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	WnliProcessorz/Processor for the WNLI data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y ri   rj   rm   s      r$   rl   zWnliProcessor.__init__4  rr   r&   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S rt   rz   r~   s     r$   r6   z*WnliProcessor.get_example_from_tensor_dict8  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   z WnliProcessor.get_train_examplesA  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   zWnliProcessor.get_dev_examplesE  r   r&   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r$   r   zWnliProcessor.get_test_examplesI  r   r&   c                 
    ddgS r   rN   r   s    r$   rP   zWnliProcessor.get_labelsM  r   r&   c           	          g }t        |      D ]I  \  }}|dk(  r| d|d    }|d   }|d   }|dk(  rdn|d   }	|j                  t        ||||	             K |S r  r   r   s
             r$   r   zWnliProcessor._create_examplesQ  r   r&   r   r   s   @r$   r  r  1  s-    9N
c_ar&   r  r   )	colamnlimrpcsst-2r)   qqpqnlirtewnli)
r  r  zmnli-mmr  r  r)   r  r  r  r   rE   rF   )NNNN).r   r   r   dataclassesr   enumr   typingr   r   tokenization_utilsr   utilsr	   r
   r   r   r   
tensorflowr   
get_loggerrc   rQ   r   listrU   r%   r}   r   r    r"   r#   rb   rg   r   r   r   r   r   r   r   r  r  glue_tasks_num_labelsr4   rS   rN   r&   r$   <module>r*     s    " 	    " 5 - = = 			H	%m  !%	 D&(99: "   F 
 $(	
''//
&
 SM	

 

D !%	4< 4"4 4n 
,M ,^+M +\wm w ,M ,^+M +\+M +\1= 1h+M +\+= +\+M +^ 
  &  r&   