
    rh6/                         d dl mZmZmZ d dlZddlmZmZm	Z	m
Z
 ddlmZmZmZmZ  e       r
d dlZddlmZ  e	       rd dlZ e
j*                  e      Z e ed	      d
       G d de             Zy)    )AnyUnionoverloadN   )add_end_docstringsis_tf_availableis_torch_availablelogging   )GenericTensorPipelinePipelineExceptionbuild_pipeline_init_args)stable_softmaxT)has_tokenizeraO  
        top_k (`int`, *optional*, defaults to 5):
            The number of predictions to return.
        targets (`str` or `list[str]`, *optional*):
            When passed, the model will limit the scores to the passed targets instead of looking up in the whole
            vocab. If the provided targets are not in the model vocab, they will be tokenized and the first resulting
            token will be used (with a warning, and that might be slower).
        tokenizer_kwargs (`dict`, *optional*):
            Additional dictionary of keyword arguments passed along to the tokenizer.c                   r    e Zd ZdZdZdZdZ	 dedej                  fdZ
dedej                  fdZdefdZ	 ddeeef   fd	Zd
 ZddZddZddZedededeeeef      fd       Zedee   dedeeeeef         fd       Zdeeee   f   dedeeeeef      eeeeef         f   f fdZ xZS )FillMaskPipelineFT	input_idsreturnc                 ,   | j                   dk(  r<t        j                  || j                  j                  k(        j                         }|S | j                   dk(  r0t        j                  || j                  j                  k(  d      }|S t        d      )NtfptFas_tuplezUnsupported framework)		frameworkr   where	tokenizermask_token_idnumpytorchnonzero
ValueError)selfr   masked_indexs      s/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/pipelines/fill_mask.pyget_masked_indexz!FillMaskPipeline.get_masked_index\   s    >>T!88I1M1M$MNTTVL
 	 ^^t# ==dnn6R6R)R]bcL  455    c                     | j                  |      }t        j                  |j                        }|dk  r9t	        d| j
                  j                  d| j                  j                   d      y )Nr   	fill-maskzNo mask_token (z) found on the input)	r&   npprodshaper   modelbase_model_prefixr   
mask_token)r#   r   r$   numels       r%   _ensure_exactly_one_mask_tokenz/FillMaskPipeline._ensure_exactly_one_mask_tokene   sg    ,,Y7**+19#

,,!$..";";!<<PQ  r'   model_inputsc                     t        |t              r|D ]  }| j                  |d   d           y |d   D ]  }| j                  |        y )Nr   r   )
isinstancelistr1   )r#   r2   model_inputr   s       r%   ensure_exactly_one_mask_tokenz.FillMaskPipeline.ensure_exactly_one_mask_tokeno   sY    lD)+ Q33K4LQ4OPQ *+6 ?	33I>?r'   c                 v    || j                   }|i } | j                  |fd|i|}| j                  |       |S )Nreturn_tensors)r   r   r7   )r#   inputsr9   tokenizer_kwargspreprocess_parametersr2   s         r%   
preprocesszFillMaskPipeline.preprocessw   sN     !!^^N#!%t~~f`^`O_`**<8r'   c                 :     | j                   di |}|d   |d<   |S )Nr    )r-   )r#   r2   model_outputss      r%   _forwardzFillMaskPipeline._forward   s*    "

2\2%1+%>k"r'   c                     |!|j                   d   |k  r|j                   d   }|d   d   }|d   }| j                  dk(  rt        j                  || j                  j
                  k(        j                         d d df   }|j                         }|d|d d f   }t        |d      }|Pt        j                  t        j                  |d      |j                  dd            }t        j                  |d      }t        j                  j                  ||      }	|	j                  j                         |	j                  j                         }}
nvt!        j"                  || j                  j
                  k(  d	
      j                  d      }|d|d d f   }|j%                  d      }||d|f   }|j'                  |      \  }
}g }|
j                   d   dk(  }t)        t+        |
j-                         |j-                                     D ]  \  }\  }}g }t+        ||      D ]  \  }}|j                         j/                         }|||   j-                         }||||   <   |t1        j                  || j                  j2                  k7           }| j                  j5                  ||      }||| j                  j5                  |g      |d}|j7                  |        |j7                  |        |r|d   S |S )Nr   r   logitsr   )axisr   )kFr   )dim.)skip_special_tokens)scoretoken	token_strsequence)r,   r   r   r   r   r   r   r   	gather_ndsqueezereshapeexpand_dimsmathtop_kvaluesindicesr    r!   softmaxtopk	enumerateziptolistcopyr*   pad_token_iddecodeappend)r#   r@   rR   
target_idsr   outputsr$   rC   probsrV   rS   predictionsresultsingle_maski_values_predictionsrowvptokensrL   propositions                          r%   postprocesszFillMaskPipeline.postprocess   s   !j&6&6q&9E&A$$Q'E!+.q1	)>>T!88I1M1M$MNTTVWXZ[W[\LmmoGQa/0F"63E%RZZq%9:;M;MbRS;TUua077==%=0D"&++"3"3"5t||7I7I7KKF ==dnn6R6R)R]bckklnoL Qa/0FNNrN*E%c:o."'**U"3FKll1o**3CI[I[I]4^*_ 	&A&CG\2 (1"*//1)"1,,.A*+|A'4>>3N3N)N OP  >>00[0Y()ADNNDYDY[\Z]D^ltu

;'( MM##	$ !9r'   c           	      \   t        |t              r|g}	 | j                  j                         }g }|D ]  }|j                  |d       }||| j                  |ddddd      d   }t        |      dk(  rt        j                  d| d       Y|d   }t        j                  d| d	| j                  j                  |       d
       |j                  |        t        t        |            }t        |      dk(  rt        d      t        j                  |      }|S # t        $ r i }Y w xY w)NFr   T)add_special_tokensreturn_attention_maskreturn_token_type_ids
max_length
truncationr   r   zThe specified target token `zd` does not exist in the model vocabulary. We cannot replace it with anything meaningful, ignoring itz:` does not exist in the model vocabulary. Replacing with `z`.z1At least one target must be provided when passed.)r4   strr   	get_vocab	Exceptiongetlenloggerwarningconvert_ids_to_tokensr]   r5   setr"   r*   array)r#   targetsrR   vocabr^   targetid_r   s           r%   get_target_idszFillMaskPipeline.get_target_ids   sZ   gs#iG	NN,,.E 
 	#F))FD)C{ NN',*/*/ # +  	 y>Q&NN6vh ?U U l
 26( ;''+~~'K'KC'P&QQSU c"5	#6 #j/*
z?aPQQXXj)
E  	E	s   D D+*D+c                     i }|||d<   i }|| j                  ||      }||d<   |||d<   | j                  j                  !t        d| j                  j
                  d      |i |fS )Nr;   r^   rR   r)   z-The tokenizer does not define a `mask_token`.)r   r   r   r   r-   r.   )r#   rR   r}   r;   preprocess_paramspostprocess_paramsr^   s          r%   _sanitize_parametersz%FillMaskPipeline._sanitize_parameters   s    '4D01,,We<J/9|,*/w'>>''/#TZZ99;j  !"&888r'   r:   kwargsc                      y Nr?   r#   r:   r   s      r%   __call__zFillMaskPipeline.__call__   s    LOr'   c                      y r   r?   r   s      r%   r   zFillMaskPipeline.__call__   s    X[r'   c                 n    t        |   |fi |}t        |t              rt	        |      dk(  r|d   S |S )a  
        Fill the masked token in the text(s) given as inputs.

        Args:
            inputs (`str` or `list[str]`):
                One or several texts (or one list of prompts) with masked tokens.
            targets (`str` or `list[str]`, *optional*):
                When passed, the model will limit the scores to the passed targets instead of looking up in the whole
                vocab. If the provided targets are not in the model vocab, they will be tokenized and the first
                resulting token will be used (with a warning, and that might be slower).
            top_k (`int`, *optional*):
                When passed, overrides the number of predictions to return.

        Return:
            A list or a list of list of `dict`: Each result comes as list of dictionaries with the following keys:

            - **sequence** (`str`) -- The corresponding input with the mask token prediction.
            - **score** (`float`) -- The corresponding probability.
            - **token** (`int`) -- The predicted token id (to replace the masked one).
            - **token_str** (`str`) -- The predicted token (to replace the masked one).
        r   r   )superr   r4   r5   rw   )r#   r:   r   r_   	__class__s       r%   r   zFillMaskPipeline.__call__  s=    0 '"64V4fd#Fq(81:r'   )NN)   Nr   )NNN)__name__
__module____qualname___load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   r*   ndarrayr&   r1   r7   dictrs   r=   rA   rl   r   r   r   r   r5   r   r   __classcell__)r   s   @r%   r   r      sF    O!#O2h- BJJ  "** ?- ? =A
	c= 	!

5n'R9* OsOcOd4S>6JO O[tCy[C[Dd3PS8nAU<V[ [CcN+7:	tDcN#T$tCH~*>%??	@ r'   r   )typingr   r   r   r   r*   utilsr   r   r	   r
   baser   r   r   r   
tensorflowr   tf_utilsr   r    
get_loggerr   rx   r   r?   r'   r%   <module>r      sx    ' '  T T V V )  
		H	% 40Y|x ||r'   