
    rh#                    p    d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	 g dZ
 G d de      Z G d d	e      Zy
)    )annotations)ABCabstractmethod)Iterable)AutoTokenizerPreTrainedTokenizerBase(  !"z''z``#$%&'()*+,-./:;<=>?@[\]^_`{|}~aaboutaboveacrossafter
afterwardsagainagainstainallalmostalonealongalreadyalsoalthoughalwaysamamongamongstamoungstamountanandanotheranyanyhowanyoneanythinganywayanywherearearenaroundasatbackbebecamebecausebecomebecomesbecomingbeenbefore
beforehandbehindbeingbelowbesidebesidesbetweenbeyondbillbothbottombutbycallcancannotcantcoconcouldcouldncouldntcryddedescribedetaildiddidndodoesdoesndoingdondonedowndueduringeachegeighteitherelevenelse	elsewhereemptyenoughetcevenevereveryeveryone
everything
everywhereexceptfewfifteenfiftyfillfindfirefirstfiveforformerformerlyfortyfoundfourfromfrontfullfurthergetgivegohadhadnhashasnhasnthavehavenhavinghehenceherhere	hereafterherebyhereinhereuponhersherselfhimhimselfhishowhoweverhundrediieifinincindeedinterestintoisisnititsitselfjustkeeplastlatterlatterlyleastlessllltdmmamademanymayme	meanwhilemightmightnmillminemoremoreovermostmostlymovemuchmustmustnmymyselfnamenamelyneednneitherneverneverthelessnextninenonobodynonenoonenornotnothingnownowhereoofoffoftenononceoneonlyontoorotherothers	otherwiseourours	ourselvesoutoverownpartperperhapspleaseputratherressameseeseemseemedseemingseemsseriousseveralshansheshouldshouldnshowsidesincesinceresixsixtysosomesomehowsomeone	somethingsometime	sometimes	somewherestillsuchsystemttaketenthanthatthetheirtheirsthem
themselvesthenthencethere
thereafterthereby	thereforetherein	thereuponthesetheythickthinthirdthisthosethoughthreethrough
throughoutthruthustotogethertootoptowardtowardstwelvetwentytwoununderuntilupuponusveveryviawaswasnwewellwerewerenwhatwhateverwhenwhencewheneverwhere
whereafterwhereaswherebywherein	whereuponwhereverwhetherwhichwhilewhitherwhowhoeverwholewhomwhosewhywillwithwithinwithoutwonwouldwouldnyyetyouyouryoursyourself
yourselvesc                  p    e Zd Zedd       Zedd       Zedd       Zed	d       Zeed
d              Z	y)WordTokenizerc                     y N selfvocabs     /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/sentence_transformers/models/tokenizer/WordTokenizer.py	set_vocabzWordTokenizer.set_vocab          c                     y r  r  r  s     r  	get_vocabzWordTokenizer.get_vocab  r  r  c                     y r  r  )r  textkwargss      r  tokenizezWordTokenizer.tokenize  r  r  c                     y r  r  r  output_paths     r  savezWordTokenizer.save  r  r  c                     y r  r  
input_paths    r  loadzWordTokenizer.load  s     	r  Nr  zIterable[str])r  strreturnz	list[int]r  r  r  r  )
__name__
__module____qualname__r   r  r  r  r  staticmethodr  r  r  r  r  r    sl              r  r  c                  R     e Zd Zd fdZddZd	dZd	dZd
dZedd       Z	 xZ
S )TransformersTokenizerWrapperc                0    t         |           || _        y r  )super__init__	tokenizer)r  r  	__class__s     r  r  z%TransformersTokenizerWrapper.__init__  s    "r  c                4    | j                  |      }|d   d   S )N	input_idsr   )r  )r  sentenceencodeds      r  r  z%TransformersTokenizerWrapper.tokenize  s     ..*{#A&&r  c                     y r  r  r  s     r  r  z&TransformersTokenizerWrapper.set_vocab  s    r  c                6    | j                   j                         S r  )r  r  r  s     r  r  z&TransformersTokenizerWrapper.get_vocab  s    ~~''))r  c                :    | j                   j                  |       y r  )r  save_pretrainedr  s     r  r  z!TransformersTokenizerWrapper.save  s    &&{3r  c                B    t        t        j                  | d            S )NT)use_fast)r  r   from_pretrainedr  s    r  r  z!TransformersTokenizerWrapper.load  s    +M,I,I*_c,deer  )r  r   )r  r  r  r  r  )r  r  r  r  r  r  r  r  r  r  __classcell__)r  s   @r  r  r    s2    #'*4 f fr  r  N)
__future__r   abcr   r   collections.abcr   transformersr   r   ENGLISH_STOP_WORDSr  r  r  r  r  <module>r     s5    " # $ ?C LC .f= fr  