
    rh"                     T    d Z ddlZej                  dk\  rddlZnddlZ G d d      Zy)z"English Normalizer class for CLVP.    N)      c                       e Zd Zd ZdedefdZdedefdZdedefdZdedefd	Z	dedefd
Z
dedefdZdedefdZdedefdZdedefdZdedefdZd Zy)EnglishNormalizerc                     dD cg c]1  }t        j                  d|d   z  t         j                        |d   f3 c}| _        g d| _        g d| _        g d| _        y c c}w )N))mrsmisess)mrmister)drdoctor)stsaint)cocompany)jrjunior)majmajor)gengeneral)drsdoctors)revreverend)lt
lieutenant)hon	honorable)sgtsergeant)captcaptain)esqesquire)ltdlimited)colcolonel)ftfortz\b%s\.r      )
 onetwothreefourfivesixseveneightnine)
teneleventwelvethirteenfourteenfifteensixteen	seventeeneighteennineteen)
r-   r-   twentythirtyfortyfiftysixtyseventyeightyninety)recompile
IGNORECASE_abbreviationsonesteenstens)selfxs     }/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/clvp/number_normalizer.py__init__zEnglishNormalizer.__init__   sc    
 ZZ
QqT)2==91Q4@
0 a	

 k	K
s   6Anumreturnc                    |dk(  ry|dk  rd| j                  t        |            z   S |dk  r| j                  |   S |dk  r| j                  |dz
     S |dk  r6| j                  |dz     |dz  dk7  rd| j                  |dz        z   z   S dz   S |d	k  r9| j                  |dz     d
z   |dz  dk7  rd| j                  |dz        z   z   S dz   S |dk  r;| j                  |d	z        dz   |d	z  dk7  rd| j                  |d	z        z   z   S dz   S |dk  r;| j                  |dz        dz   |dz  dk7  rd| j                  |dz        z   z   S dz   S |dk  r;| j                  |dz        dz   |dz  dk7  rd| j                  |dz        z   z   S dz   S |dk  r;| j                  |dz        dz   |dz  dk7  rd| j                  |dz        z   z   S dz   S |dk  r;| j                  |dz        dz   |dz  dk7  rd| j                  |dz        z   z   S dz   S y)ax  
        Converts numbers(`int`) to words(`str`).

        Please note that it only supports upto - "'nine hundred ninety-nine quadrillion, nine hundred ninety-nine
        trillion, nine hundred ninety-nine billion, nine hundred ninety-nine million, nine hundred ninety-nine
        thousand, nine hundred ninety-nine'" or `number_to_words(999_999_999_999_999_999)`.
        r   zerozminus 
      d   -r-      hundred i@B z	 thousand, i ʚ;z millionl    J)z billionl     I5 z	 trillionl     NZoz quadrillionznumber out of range)number_to_wordsabsrM   rN   rO   )rP   rT   s     rR   r`   z!EnglishNormalizer.number_to_wordsF   s	    !81Wd223s8<<<2X99S>!2X::cBh''3Y99SBY'SVY[S[_`S`31E1EcBh1O+Oiifhii4Z		#*%
2_beh_hlm_mcD<P<PQTWZQZ<[6[vsuv 9_$$SD[1>ADjAo4$..sTz::W TVW
 = $$SI%56CF?VWCW4$..sY??a ^`a
 $$$$SM%9:GJ]GZ^_G_4$..s]/BCCi fhi
 (($$S,=%=>KNQbKbfgKg4$..s5F/FGGq npq
 ,,$$S,A%AB ! 22a7 4//6K0KLL  )    textc                 D    |j                  dd      j                  d      S )z+
        Converts unicode to ascii
        asciiignorezutf-8)encodedecoderP   rc   s     rR   convert_to_asciiz"EnglishNormalizer.convert_to_ascii   s      {{7H-44W==rb   mc                 l   |j                  d      }|j                  d      }t        |      dkD  r|dz   S |d   rt        |d         nd}t        |      dkD  r|d   rt        |d         nd}|r!|r|dk(  rdnd}|dk(  rdnd	}|d
|d|d
|S |r|dk(  rdnd}|d
|S |r|dk(  rdnd	}|d
|S y)zZ
        This method is used to expand numerical dollar values into spoken words.
        r,   .   z dollarsr   dollardollarscentcentsr^   r_   zzero dollars)groupsplitlenint)rP   rk   matchpartsrp   rr   dollar_unit	cent_units           rR   _expand_dollarsz!EnglishNormalizer._expand_dollars   s     
C u:>:%%#(8#eAh-!$UaE!HE!H!u&-l(	K"'1*'I%,k5)LL&-l(	K%{33"'1*'I#Y//!rb   c                 D    |j                  d      j                  dd      S )zF
        This method is used to remove commas from sentences.
        r,   ,r-   rs   replacerP   rk   s     rR   _remove_commasz EnglishNormalizer._remove_commas   s     wwqz!!#r**rb   c                 D    |j                  d      j                  dd      S )zO
        This method is used to expand '.' into spoken word ' point '.
        r,   rm   z point r~   r   s     rR   _expand_decimal_pointz'EnglishNormalizer._expand_decimal_point   s     wwqz!!#y11rb   c                     dddd}t        |j                  d      dd       }d|d	z  k  r|d	z  d
k  rd}n|j                  |dz  d      }| j                  |      |z   S )z`
        This method is used to expand ordinals such as '1st', '2nd' into spoken words.
        r   ndrd)r,   rn   r   r   NrX   rZ   rY   th)rv   rs   getr`   )rP   rT   ordinal_suffixessuffixs       rR   _expand_ordinalz!EnglishNormalizer._expand_ordinal   sp      $6#))A,s#$s?sSyBF%))#(D9F##C(611rb   c                    t        |j                  d            }|dkD  r\|dk  rW|dk(  ry|dkD  r|dk  rd| j                  |dz        z   S |dz  dk(  r| j                  |dz        d	z   S | j                  |      S | j                  |      S )
a  
        This method acts as a preprocessing step for numbers between 1000 and 3000 (same as the original repository,
        link :
        https://github.com/neonbjb/tortoise-tts/blob/4003544b6ff4b68c09856e04d3eff9da26d023c2/tortoise/utils/tokenizer.py#L86)
        r   r\   i  i  ztwo thousandi  ztwo thousand rZ   r]   )rv   rs   r`   )rP   rk   rT   s      rR   _expand_numberz EnglishNormalizer._expand_number   s     !''!*o:#*d{%td
&)=)=cCi)HHHsa++C3J7*DD++C00'',,rb   c                 ~   t        j                  d| j                  |      }t        j                  dd|      }t        j                  d| j                  |      }t        j                  d| j                  |      }t        j                  d| j
                  |      }t        j                  d| j                  |      }|S )z
        This method is used to normalize numbers within a text such as converting the numbers to words, removing
        commas, etc.
        z([0-9][0-9,]+[0-9])u   £([0-9,]*[0-9])z	\1 poundsz\$([0-9.,]*[0-9])z([0-9]++\.[0-9]+)z[0-9]++(st|nd|rd|th)z[0-9]+)rI   subr   r{   r   r   r   ri   s     rR   normalize_numbersz#EnglishNormalizer.normalize_numbers   s    
 vv,d.A.A4Hvv)<>vv*D,@,@$Gvv*D,F,FMvv-t/C/CTJvvi!4!4d;rb   c                 \    | j                   D ]  \  }}t        j                  |||      } |S )z/
        Expands the abbreviate words.
        )rL   rI   r   )rP   rc   regexreplacements       rR   expand_abbreviationsz&EnglishNormalizer.expand_abbreviations   s5     #'"5"5 	4E;66%d3D	4rb   c                 V    t        j                  t        j                  d      d|      S )z.
        Removes multiple whitespaces
        z\s+r^   )rI   r   rJ   ri   s     rR   collapse_whitespacez%EnglishNormalizer.collapse_whitespace   s      vvbjj(#t44rb   c                     | j                  |      }|j                         }| j                  |      }| j                  |      }| j	                  |      }|j                  dd      }|S )z
        Converts text to ascii, numbers / number-like quantities to their spelt-out counterparts and expands
        abbreviations
        "r-   )rj   lowerr   r   r   r   ri   s     rR   __call__zEnglishNormalizer.__call__   sd     $$T*zz|%%d+((.''-||C$rb   N)__name__
__module____qualname__rS   rv   strr`   rj   r{   r   r   r   r   r   r   r   r    rb   rR   r   r      s    'kR9)3 9)3 9)v>S >S >" " "0+ + +2s 2s 223 23 2- - -(c c   5 5 5rb   r   )__doc__sysversion_inforI   r   r   r   rb   rR   <module>r      s/     ) 
 wX Xrb   