
    rh	1                         d dl Z d dlmZ d dlZddlmZ ddlmZm	Z	 ddl
mZmZmZ  e	j                  e      Z G d d	e      Z e ed
             G d de             Zy)    N)Union   )TruncationStrategy)add_end_docstringslogging   )ArgumentHandlerChunkPipelinebuild_pipeline_init_argsc                       e Zd ZdZd Zd Zy)%ZeroShotClassificationArgumentHandlerz
    Handles arguments for zero-shot for text classification by turning each possible label into an NLI
    premise/hypothesis pair.
    c                     t        |t              r=|j                  d      D cg c]#  }|j                         s|j                         % }}|S c c}w )N,)
isinstancestrsplitstrip)selflabelslabels      /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/pipelines/zero_shot_classification.py_parse_labelsz3ZeroShotClassificationArgumentHandler._parse_labels   sA    fc"17c1BTekkmekkmTFT Us
   AAc           
      F   t        |      dk(  st        |      dk(  rt        d      |j                  |d         |k(  rt        d| d      t        |t              r|g}g }|D ]2  }|j                  |D cg c]  }||j                  |      g c}       4 ||fS c c}w )Nr   z>You must include at least one label and at least one sequence.z"The provided hypothesis_template "z" was not able to be formatted with the target labels. Make sure the passed template includes formatting syntax such as {} where the label should go.)len
ValueErrorformatr   r   extend)r   	sequencesr   hypothesis_templatesequence_pairssequencer   s          r   __call__z.ZeroShotClassificationArgumentHandler.__call__   s    v;!s9~2]^^%%fQi04GG45H4I Jq q 
 i%"I! 	gH!!^d"eUZH.A.H.H.O#P"ef	g y(( #fs   7B
N)__name__
__module____qualname____doc__r   r"        r   r   r      s    

)r(   r   T)has_tokenizerc                        e Zd ZdZdZdZdZdZ e       f fd	Z	e
d        Zddej                  fdZd Zdeeee   f   f fd	Zdd
Zd ZddZ xZS )ZeroShotClassificationPipelinea  
    NLI-based zero-shot classification pipeline using a `ModelForSequenceClassification` trained on NLI (natural
    language inference) tasks. Equivalent of `text-classification` pipelines, but these models don't require a
    hardcoded number of potential classes, they can be chosen at runtime. It usually means it's slower but it is
    **much** more flexible.

    Any combination of sequences and labels can be passed and each combination will be posed as a premise/hypothesis
    pair and passed to the pretrained model. Then, the logit for *entailment* is taken as the logit for the candidate
    label being valid. Any NLI model can be used, but the id of the *entailment* label must be included in the model
    config's :attr:*~transformers.PretrainedConfig.label2id*.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> oracle = pipeline(model="facebook/bart-large-mnli")
    >>> oracle(
    ...     "I have a problem with my iphone that needs to be resolved asap!!",
    ...     candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
    ... )
    {'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}

    >>> oracle(
    ...     "I have a problem with my iphone that needs to be resolved asap!!",
    ...     candidate_labels=["english", "german"],
    ... )
    {'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['english', 'german'], 'scores': [0.814, 0.186]}
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This NLI pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"zero-shot-classification"`.

    The models that this pipeline can use are models that have been fine-tuned on an NLI task. See the up-to-date list
    of available models on [huggingface.co/models](https://huggingface.co/models?search=nli).
    FTc                 |    || _         t        |   |i | | j                  dk(  rt        j                  d       y y )NzFailed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.)_args_parsersuper__init__entailment_idloggerwarning)r   args_parserargskwargs	__class__s       r   r0   z'ZeroShotClassificationPipeline.__init__Z   sB    '$)&)#NNk $r(   c                     | j                   j                  j                  j                         D ](  \  }}|j	                         j                  d      s&|c S  y)Nentailr-   )modelconfiglabel2iditemslower
startswith)r   r   inds      r   r1   z,ZeroShotClassificationPipeline.entailment_idc   sL    **++44::< 	JE3{{}''1
	 r(   c                    | j                   }| j                  j                  :t        j	                  d       | j                  j
                  | j                  _        	 | j                  |||||      }|S # t        $ r?}dt        |      v r%| j                  ||||t        j                        }n|Y d}~|S d}~ww xY w)ze
        Parse arguments and tokenize only_first so that hypothesis (label) is not truncated
        NzfTokenizer was not supporting padding necessary for zero-shot, attempting to use  `pad_token=eos_token`)add_special_tokensreturn_tensorspadding
truncationz	too short)
	framework	tokenizer	pad_tokenr2   error	eos_token	Exceptionr   r   DO_NOT_TRUNCATE)	r   r    rD   rB   rE   r6   rC   inputses	            r   _parse_and_tokenizez2ZeroShotClassificationPipeline._parse_and_tokenizej   s     >>##+LL) (,~~'?'?DNN$	^^#5-% $ F2 %  	c!f$ "'9#1#1AA (    %	s   A6 6	B>?4B99B>c                     |j                  d      |d   |d<   t        j                  d       i }d|v r!| j                  j	                  |d         |d<   d|v r|d   |d<   i }d|v r|d   |d<   |i |fS )Nmulti_classmulti_labelzThe `multi_class` argument has been deprecated and renamed to `multi_label`. `multi_class` will be removed in a future version of Transformers.candidate_labelsr   )getr2   r3   r.   r   )r   r6   preprocess_paramspostprocess_paramss       r   _sanitize_parametersz3ZeroShotClassificationPipeline._sanitize_parameters   s    ::m$0$*=$9F=!NNU '484E4E4S4STZ[mTn4o01 F*7=>S7T34F"06}0E}- "&888r(   r   c                     t        |      dk(  rn)t        |      dk(  rd|vr	|d   |d<   nt        d|       t        |   |fi |S )a  
        Classify the sequence(s) given as inputs. See the [`ZeroShotClassificationPipeline`] documentation for more
        information.

        Args:
            sequences (`str` or `list[str]`):
                The sequence(s) to classify, will be truncated if the model input is too large.
            candidate_labels (`str` or `list[str]`):
                The set of possible class labels to classify each sequence into. Can be a single label, a string of
                comma-separated labels, or a list of labels.
            hypothesis_template (`str`, *optional*, defaults to `"This example is {}."`):
                The template used to turn each label into an NLI-style hypothesis. This template must include a {} or
                similar syntax for the candidate label to be inserted into the template. For example, the default
                template is `"This example is {}."` With the candidate label `"sports"`, this would be fed into the
                model like `"<cls> sequence to classify <sep> This example is sports . <sep>"`. The default template
                works well in many cases, but it may be worthwhile to experiment with different templates depending on
                the task setting.
            multi_label (`bool`, *optional*, defaults to `False`):
                Whether or not multiple candidate labels can be true. If `False`, the scores are normalized such that
                the sum of the label likelihoods for each sequence is 1. If `True`, the labels are considered
                independent and probabilities are normalized for each candidate by doing a softmax of the entailment
                score vs. the contradiction score.

        Return:
            A `dict` or a list of `dict`: Each result comes as a dictionary with the following keys:

            - **sequence** (`str`) -- The sequence for which this is the output.
            - **labels** (`list[str]`) -- The labels sorted by order of likelihood.
            - **scores** (`list[float]`) -- The probabilities for each of the labels.
        r   r   rS   z%Unable to understand extra arguments )r   r   r/   r"   )r   r   r5   r6   r7   s       r   r"   z'ZeroShotClassificationPipeline.__call__   s^    H t9>Y!^ 2& @)-aF%&DTFKLLw	4V44r(   c              #      K   | j                  |||      \  }}t        t        ||            D ]6  \  }\  }}| j                  |g      }	||d   |t	        |      dz
  k(  d|	 8 y w)Nr   r   candidate_labelr!   is_last)r.   	enumerateziprO   r   )
r   rM   rS   r   r    r   ir[   sequence_pairmodel_inputs
             r   
preprocessz)ZeroShotClassificationPipeline.preprocess   s     $($5$5f>NPc$d!	3<SAQSa=b3c 	/A/22M?CK $3%aL$4 5 99 	 	s   A&A(c                 n   |d   }|d   }| j                   j                  D ci c]  }|||   
 }}| j                  dk(  r| j                  j                  n| j                  j
                  }dt        j                  |      j                  v rd|d<    | j                  di |}|||d   d|}|S c c}w )	Nr[   r!   pt	use_cacheFr\   rZ   r'   )	rG   model_input_namesrF   r:   forwardcallinspect	signature
parameters)	r   rM   r[   r!   kmodel_inputsmodel_forwardoutputsmodel_outputss	            r   _forwardz'ZeroShotClassificationPipeline._forward   s     !23*%.2nn.N.NO6!9OO.2nn.D

**$**//'++M:EEE(-L%$**,|,  / i(
 	
  Ps   B2c                    |D cg c]  }|d   	 }}|D cg c]  }|d   	 }}| j                   dk(  rCt        j                  |D cg c]#  }|d   j                         j	                         % c}      }n4t        j                  |D cg c]  }|d   j	                          c}      }|j
                  d   }t        |      }	||	z  }
|j                  |
|	df      }|st        |      dk(  r`| j                  }|dk(  rdnd}|d||gf   }t        j                  |      t        j                  |      j                  dd	
      z  }|d   }nM|d| j                  f   }t        j                  |      t        j                  |      j                  dd	
      z  }t        t        |d   j                                     }|d   |D cg c]  }||   	 c}|d|f   j                         dS c c}w c c}w c c}w c c}w c c}w )Nr[   r!   rd   logitsr   r-   r   .T)keepdims).r   )r!   r   scores)rF   npconcatenatefloatnumpyshaper   reshaper1   expsumlistreversedargsorttolist)r   rp   rR   ro   rS   r   outputrs   Nnnum_sequencesreshaped_outputsr1   contradiction_identail_contr_logitsru   entail_logitstop_indsr_   s                      r   postprocessz*ZeroShotClassificationPipeline.postprocess   s   FST7G$56TT8EFWWZ(F	F>>T!^^Ta$b&VH%5%;%;%=%C%C%E$bcF^^M$Z&VH%5%;%;%=$Z[FLLO !Q!>>=!R*@A#./14 ..M%2a%7rQ"239I=8Y3Y"ZVV/0266:M3N3R3RSU`d3R3eeFF^F -S$2D2D-DEMVVM*RVVM-B-F-FrTX-F-YYF!2!2!456!!4<=q'*=Q[)002
 	
1 UF$b$Z* >s   G-G2(G7
G<H)NzThis example is {}.)F)r#   r$   r%   r&   _load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   r0   propertyr1   r   
ONLY_FIRSTrO   rW   r   r   r~   r"   rb   rq   r   __classcell__)r7   s   @r   r+   r+   ,   s    %N O!#O#H#J    '+tPbPmPm(T9$+5d3i(+5Z$
r(   r+   )ri   typingr   ry   rv   tokenization_utilsr   utilsr   r   baser	   r
   r   
get_loggerr#   r2   r   r+   r'   r(   r   <module>r      sd       3 / J J 
		H	%)O )< ,4@Ab
] b
 Bb
r(   