
    rh!                     (   d dl Z d dlmZ d dlmZmZmZ ddlmZm	Z	m
Z
mZmZmZ ddlmZmZ  e       rd dlmZ dd	lmZ  e
       r
d dlZdd
lmZ  e	       rddlmZ ddlmZ  ej8                  e      Z e ed             G d de             Zy)    N)UserDict)AnyUnionoverload   )add_end_docstringsis_tf_availableis_torch_availableis_vision_availableloggingrequires_backends   )Pipelinebuild_pipeline_init_args)Image)
load_image)6MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES)9TF_MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES)stable_softmaxT)has_image_processorc                   X    e Zd ZdZdZdZdZdZ fdZe	de
edf   dee   ded	eeeef      fd
       Ze	de
ee   ed   f   dee   ded	eeeeef         fd       Zde
eee   ded   f   dee   ded	e
eeeef      eeeeef         f   f fdZddZ	 	 	 	 ddZd Zd Z xZS )#ZeroShotImageClassificationPipelineaL  
    Zero shot image classification pipeline using `CLIPModel`. This pipeline predicts the class of an image when you
    provide an image and a set of `candidate_labels`.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> classifier = pipeline(model="google/siglip-so400m-patch14-384")
    >>> classifier(
    ...     "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
    ...     candidate_labels=["animals", "humans", "landscape"],
    ... )
    [{'score': 0.965, 'label': 'animals'}, {'score': 0.03, 'label': 'humans'}, {'score': 0.005, 'label': 'landscape'}]

    >>> classifier(
    ...     "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
    ...     candidate_labels=["black and white", "photorealist", "painting"],
    ... )
    [{'score': 0.996, 'label': 'black and white'}, {'score': 0.003, 'label': 'photorealist'}, {'score': 0.0, 'label': 'painting'}]
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This image classification pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"zero-shot-image-classification"`.

    See the list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=zero-shot-image-classification).
    FTc                     t        |   di | t        | d       | j                  | j                  dk(  rt
               y t               y )Nvisiontf )super__init__r   check_model_type	frameworkr   r   )selfkwargs	__class__s     /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/pipelines/zero_shot_image_classification.pyr   z,ZeroShotImageClassificationPipeline.__init__H   sH    "6"$)~~% F	
 H	
    imagezImage.Imagecandidate_labelsr"   returnc                      y Nr   r!   r&   r'   r"   s       r$   __call__z,ZeroShotImageClassificationPipeline.__call__R   s      #r%   c                      y r*   r   r+   s       r$   r,   z,ZeroShotImageClassificationPipeline.__call__W   s     &)r%   c                 l    d|v r|j                  d      }|t        d      t        |   |fd|i|S )a  
        Assign labels to the image(s) passed as inputs.

        Args:
            image (`str`, `list[str]`, `PIL.Image` or `list[PIL.Image]`):
                The pipeline handles three types of images:

                - A string containing a http link pointing to an image
                - A string containing a local path to an image
                - An image loaded in PIL directly

            candidate_labels (`list[str]`):
                The candidate labels for this image. They will be formatted using *hypothesis_template*.

            hypothesis_template (`str`, *optional*, defaults to `"This is a photo of {}"`):
                The format used in conjunction with *candidate_labels* to attempt the image classification by
                replacing the placeholder with the candidate_labels. Pass "{}" if *candidate_labels* are
                already formatted.

            timeout (`float`, *optional*, defaults to None):
                The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
                the call may block forever.

        Return:
            A list of dictionaries containing one entry per proposed label. Each dictionary contains the
            following keys:
            - **label** (`str`) -- One of the suggested *candidate_labels*.
            - **score** (`float`) -- The score attributed by the model to that label. It is a value between
                0 and 1, computed as the `softmax` of `logits_per_image`.
        imageszSCannot call the zero-shot-image-classification pipeline without an images argument!r'   )pop
ValueErrorr   r,   )r!   r&   r'   r"   r#   s       r$   r,   z,ZeroShotImageClassificationPipeline.__call__\   sH    J vJJx(E=rsswS8HSFSSr%   c                     i }d|v r|d   |d<   d|v r|d   |d<   d|v r|d   |d<   |t        j                  dt               ||d<   |i i fS )Nr'   timeouthypothesis_templatez^The `tokenizer_kwargs` argument is deprecated and will be removed in version 5 of Transformerstokenizer_kwargs)warningswarnFutureWarning)r!   r5   r"   preprocess_paramss       r$   _sanitize_parametersz8ZeroShotImageClassificationPipeline._sanitize_parameters   s    '4:;M4N01+1)+<i( F*7=>S7T34'MMp 5E01 "b((r%   c                    |i }t        ||      }| j                  |g| j                        }| j                  dk(  r|j                  | j                        }||d<   |D cg c]  }|j                  |       }}ddi}	d| j                  j                  j                  v r|	j                  dd	d
       |	j                  |        | j                  |fd| j                  i|	}
|
g|d<   |S c c}w )N)r3   )r/   return_tensorsptr'   paddingTsiglip
max_length@   )r>   r@   
truncationr<   text_inputs)r   image_processorr    totorch_dtypeformatmodelconfig
model_typeupdate	tokenizer)r!   r&   r'   r4   r3   r5   inputsx	sequencestokenizer_default_kwargsrC   s              r$   
preprocessz.ZeroShotImageClassificationPipeline.preprocess   s     #!5'2%%eWT^^%T>>T!YYt//0F%5!"<LMq(//2M	M$-t#4 tzz((333$++LR\`+a ''(89$dnnYjt~~jQij!,} Ns   #C/c                     |j                  d      }|j                  d      }t        |d   t              r|d   }n|d   d   } | j                  di ||}||j                  d}|S )Nr'   rC   r   )r'   logitsr   )r0   
isinstancer   rH   logits_per_image)r!   model_inputsr'   rC   outputsmodel_outputss         r$   _forwardz,ZeroShotImageClassificationPipeline._forward   s    '++,>?"&&}5k!nh/%a.K &a.+K$**;{;l; !1..
 r%   c                    |j                  d      }|d   d   }| j                  dk(  rjd| j                  j                  j                  v rHt        j                  |      j                  d      }|j                         }t        |t              s|g}n| j                  dk(  rE|j                  d      j                  d      }|j                         }t        |t              sW|g}nS| j                  dk(  r,t        |d	      }|j                         j                         }nt        d
| j                         t        t!        ||      d       D cg c]
  \  }}||d }}}|S c c}}w )Nr'   rS   r   r=   r?   )dimr   )axiszUnsupported framework: c                     | d    S )Nr   r   )rN   s    r$   <lambda>zAZeroShotImageClassificationPipeline.postprocess.<locals>.<lambda>   s    _`ab_c^c r%   )key)scorelabel)r0   r    rH   rI   rJ   torchsigmoidsqueezetolistrT   listsoftmaxr   numpyr1   sortedzip)	r!   rX   r'   rS   probsscoresra   candidate_labelresults	            r$   postprocessz/ZeroShotImageClassificationPipeline.postprocess   sF   (,,-?@x(+>>T!h$**2C2C2N2N&NMM&)11"5E\\^Ffd+ ^^t#NNrN*2226E\\^Ffd+ ^^t#"63E[[]))+F6t~~6FGHH +1V=M1NTc*d
& o6
 
 	
s   E)r*   )NzThis is a photo of {}.NN)__name__
__module____qualname____doc___load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   r   r   strrg   r   dictr,   r:   rQ   rY   rp   __classcell__)r#   s   @r$   r   r   !   sj   @ O #O
 #3-.#BFs)#WZ#	d38n	# # )49d=&99:)NRSVi)cf)	d4S>"	#) ))TS$s)]D4GGH)T s))T 	)T
 
tDcN#T$tCH~*>%??	@)TV)( 40"r%   r   ) r6   collectionsr   typingr   r   r   utilsr   r	   r
   r   r   r   baser   r   PILr   image_utilsr   rc   models.auto.modeling_autor   models.auto.modeling_tf_autor   tf_utilsr   
get_loggerrq   loggerr   r   r%   r$   <module>r      s       ' '  5 (bh)			H	% ,FGv( v Hvr%   