
    rhA
                     0    d Z ddlmZmZmZ  G d d      Zy)z Tokenization utils for RoFormer.    )NormalizedStringPreTokenizedStringnormalizersc                   :    e Zd Zd	dZdededee   fdZdefdZ	y)
JiebaPreTokenizerreturnNc                     || _         t        j                  dddd      | _        	 dd l}|| _        y # t        $ r t	        d      w xY w)NFT)
clean_texthandle_chinese_charsstrip_accents	lowercaser   zkYou need to install rjieba to use RoFormerTokenizer. See https://pypi.org/project/rjieba/ for installation.)vocabr   BertNormalizerrjiebaImportErrorjieba)selfr   r   s      /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/roformer/tokenization_utils.py__init__zJiebaPreTokenizer.__init__   s]    
&55!%	
	 
  	I 	s	   3 Ainormalized_stringc                 \   g }| j                   j                  t        |      d      D ]  \  }}}|| j                  v r|j	                  |||        *| j
                  j                  |      j                         }|D ])  }|s|t        |      z   }|j	                  |||        |}+  |S )NF)hmm)	r   tokenizestrr   appendr   normalize_strsplitlen)r   r   r   splitstokenstartend
token_lists           r   jieba_splitzJiebaPreTokenizer.jieba_split&   s     "&!4!4S9J5KQV!4!W 		$E5#

"/c:;!--;;EBHHJ
' $E#c%j0&7c&BC #	$		$*     pretokc                 :    |j                  | j                         y )N)r   r%   )r   r'   s     r   pre_tokenizezJiebaPreTokenizer.pre_tokenizeA   s    T%%&r&   )r   N)
__name__
__module____qualname__r   intr   listr%   r   r)    r&   r   r   r      s5    "S 5E $O_J` 6'#5 'r&   r   N)__doc__
tokenizersr   r   r   r   r/   r&   r   <module>r2      s    ' H H.' .'r&   