
    rhZ                        d dl Z d dlmZmZ d dlZd dlmZ d dlmc mZ	 ddl
mZ ddlmZ ddlmZ ddlmZmZ ddlmZ dd	lmZ d
dlmZmZmZmZmZmZmZmZm Z  ddl!m"Z"  ejF                  e$      Z% G d de      Z& G d de      Z' G d dejP                        Z) G d de      Z* G d de      Z+ G d de      Z, G d dejP                        Z- G d dejP                        Z. G d dejP                        Z/ G d  d!ee       Z0eZ1 G d" d#e      Z2 G d$ d%e      Z3 G d& d'e      Z4 G d( d)e      Z5 G d* d+e      Z6g d,Z7y)-    N)OptionalUnion   )is_deepspeed_zero3_enabled)is_fsdp_managed_module)GradientCheckpointingLayer)BaseModelOutputWav2Vec2BaseModelOutput)PreTrainedModel)logging   )	Wav2Vec2FeatureProjectionWav2Vec2FeedForward#Wav2Vec2ForAudioFrameClassificationWav2Vec2ForCTC!Wav2Vec2ForSequenceClassificationWav2Vec2ForXVectorWav2Vec2ModelWav2Vec2PositionalConvEmbeddingWav2Vec2PreTrainedModel   )WavLMConfigc                       e Zd Zy)WavLMPositionalConvEmbeddingN__name__
__module____qualname__     z/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/wavlm/modular_wavlm.pyr   r          r    r   c                       e Zd Zy)WavLMFeatureProjectionNr   r   r    r!   r$   r$   #   r"   r    r$   c                       e Zd ZdZ	 	 	 	 ddedededededef fdZ	 	 	 	 dd	ej                  d
e
ej                     de
ej                     dedeej                  e
ej                     e
eej                        f   f
dZd	ej                  d
eej                  ej                   f   dej                  dedej                  ej                  ff
dZdededej                  fdZdej                  dej                  fdZ xZS )WavLMAttentionz=Multi-headed attention from 'Attention Is All You Need' paper	embed_dim	num_headsdropoutnum_bucketsmax_distancehas_relative_position_biasc                    t         |           || _        || _        || _        ||z  | _        | j
                  |z  | j                  k7  rt        d| j                   d| d      | j
                  dz  | _        t        j                  ||      | _
        t        j                  ||      | _        t        j                  ||      | _        t        j                  ||      | _        || _        || _        t        j                   t#        j$                  d| j                  dd            | _        t        j                  | j
                  d      | _        |r0t        j*                  | j                  | j                        | _        y y )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).g      r      )super__init__r'   r(   r)   head_dim
ValueErrorscalingnnLineark_projv_projq_projout_projr*   r+   	Parametertorchonesgru_rel_pos_constgru_rel_pos_linear	Embeddingrel_attn_embed)selfr'   r(   r)   r*   r+   r,   	__class__s          r!   r0   zWavLMAttention.__init__*   s7    	""!Y.MMI%$..8MdnnM]$YKr3  }}d*ii	95ii	95ii	95		)Y7&(!#ejjDNNAq.Q!R"$))DMM1"=%"$,,t/?/?"PD &r    hidden_statesattention_maskposition_biasoutput_attentionsreturnc                     |j                         \  }}}|S| j                  ||      }|j                  d      j                  |ddd      j	                  || j
                  z  ||      }|j	                  |j                  dd | j
                  dfz         }	|	j                  dddd      }	| j                  |	      }
|
j	                  |	j                  dd dz         j                  d      }
t        j                  |
      j                  dd      \  }}||| j                  z  d	z
  z  d
z   }|j	                  || j
                  z  dd      |z  }|j	                  d||f      }| j                  ||||      \  }}|||fS )z'Attention layer with relative attentionNr   r   r   r   )r      dim      ?g       @)sizecompute_bias	unsqueezerepeatviewr(   shapepermuter>   sumr;   sigmoidchunkr=   torch_multi_head_self_attention)rA   rC   rD   rE   rF   indexbsztgt_len_gated_hidden_statesrelative_position_projgate_agate_bgate_outputgated_position_biasattn_outputattn_weightss                    r!   forwardzWavLMAttention.forwardN   s    (,,.Wa   --gw?M''*11#q!Q?DDS4>>EY[bdkl  ,001D1DSb1IT^^]_L`1`a199!Q1E "&!8!89L!M!7!<!<=P=V=VWZXZ=[^d=d!e!i!ijl!m '=>DDQBDO)?)? ?# EFL *..sT^^/CRKm[166GW7MN$($H$H>+>@Q%
!\ L-77r    rb   c                 X   |j                  dd      x}x}}||j                  d      nd}dx}	}
d}t        j                  |||| j                  | j
                  t        j                  dg      t        j                  | j                  j                  | j                  j                  | j                  j                  f      |	|
|| j                  | j                  j                  | j                  j                  | j                   |||d| j                  j                  | j                  j                  | j                  j                        \  }}|j                  dd      }|C|dddf   j#                  |j$                  dd | j
                  fz   |j$                  dd z         }||fS )zCsimple wrapper around torch's multi_head_attention_forward functionr   r   NFT)use_separate_proj_weightq_proj_weightk_proj_weightv_proj_weight)	transposeneFmulti_head_attention_forwardr'   r(   r;   emptycatr8   biasr6   r7   r)   r9   weighttrainingbroadcast_torS   )rA   rC   rD   rb   rF   querykeyvaluekey_padding_maskbias_kbias_vadd_zero_attnrc   rd   s                 r!   rX   z.WavLMAttention.torch_multi_head_self_attentionw   s    ,55a;;;e3A3M>,,Q/SW  %&$B$BNNNNKKIIt{{'')9)94;;;K;KLMLLMM  MMMM%)++,,++,,++,,+%
!\2 "++Aq1# (40==""2A&$..)::\=O=OPQPR=SSL L((r    query_length
key_lengthc                    t        j                  |t         j                        d d d f   }t        j                  |t         j                        d d d f   }||z
  }| j                  |      }|j	                  | j
                  j                  j                        }| j                  |      }|j                  g d      }|S )N)dtype)r   r   r   )	r;   arangelong_relative_positions_buckettor@   rr   devicerT   )rA   r|   r}   context_positionmemory_positionrelative_positionrelative_position_bucketvaluess           r!   rO   zWavLMAttention.compute_bias   s     <<EJJG4P,,zDT1WM+.>>#'#B#BCT#U #;#>#>t?R?R?Y?Y?`?`#a $$%=>	*r    relative_positionsc                 $   | j                   dz  }|dkD  j                  t        j                        |z  }t        j                  |      }|dz  }||k  }t        j
                  |j                         |z        }|t        j
                  | j                  |z        z  }|||z
  z  }||z   j                  t        j                        }t        j                  |t        j                  ||dz
              }|t        j                  |||      z  }|S )Nr   r   r   )r*   r   r;   r   abslogfloatmathr+   min	full_likewhere)rA   r   r*   relative_buckets	max_exactis_smallrelative_positions_if_largerelative_position_if_larges           r!   r   z)WavLMAttention._relative_positions_bucket   s   &&!+.266uzzB[P"YY'9:1$	%	1&+ii0B0H0H0JY0V&W#&ADHHTM^M^ajMjDk&k#&A[S\E\&]#&/2M&M%Q%QRWR\R\%]"%*YY&8RT_bcTc(d&
" 	EKK2DF`aar    )        i@  i   TNNFr   )r   r   r   __doc__intr   boolr0   r;   Tensorr   tuplere   FloatTensorr   
LongTensor
BoolTensorrX   rO   r   __classcell__rB   s   @r!   r&   r&   '   s   G +/"Q"Q "Q 	"Q
 "Q "Q %)"QN 2604"''8||'8 !.'8  -	'8
  '8 
u||Xell3XeELL>Q5RR	S'8R5)((5) e..0@0@@A5) #..	5)
  5) 

U..	/5)n # %BSBS  U=N=N  SXSdSd  r    r&   c                       e Zd Zy)WavLMFeedForwardNr   r   r    r!   r   r      r"   r    r   c                   2     e Zd Zddedef fdZddZ xZS )WavLMEncoderLayerconfigr,   c                    t         |           t        |j                  |j                  |j
                  |j                  |j                  |      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        t!        |      | _        t        j                  |j                  |j                        | _        y N)r'   r(   r)   r*   r+   r,   epsr/   r0   r&   hidden_sizenum_attention_headsattention_dropoutr*   max_bucket_distance	attentionr4   Dropouthidden_dropoutr)   	LayerNormlayer_norm_eps
layer_normr   feed_forwardfinal_layer_normrA   r   r,   rB   s      r!   r0   zWavLMEncoderLayer.__init__       '((00,,**33'A
 zz&"7"78,,v'9'9v?T?TU,V4 "V-?-?VEZEZ [r    c                     |}| j                  |||||      \  }}}| j                  |      }||z   }| j                  |      }|| j                  |      z   }| j	                  |      }||f}|r||fz  }|S )NrD   rE   rF   rY   )r   r)   r   r   r   )	rA   rC   rD   rE   rF   rY   attn_residualrd   outputss	            r!   re   zWavLMEncoderLayer.forward   s    %59^^)'/ 6D 6
2|] ]3%56%(9(9-(HH--m< -0&Gr    Tr   r   r   r   r   r   r0   re   r   r   s   @r!   r   r      s    \{ \ \r    r   c                   2     e Zd Zddedef fdZddZ xZS ) WavLMEncoderLayerStableLayerNormr   r,   c                    t         |           t        |j                  |j                  |j
                  |j                  |j                  |      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        t!        |      | _        t        j                  |j                  |j                        | _        y r   r   r   s      r!   r0   z)WavLMEncoderLayerStableLayerNorm.__init__   r   r    c                     |}| j                  |      }| j                  ||||      \  }}}| j                  |      }||z   }|| j                  | j	                  |            z   }||f}|r||fz  }|S )N)rD   rE   rF   )r   r   r)   r   r   )rA   rC   rD   rE   rF   r   rd   r   s           r!   re   z(WavLMEncoderLayerStableLayerNorm.forward
  s    %659^^)'/	 6D 6
2|] ]3%5%(9(9$:O:OP]:^(__ -0&Gr    r   )NNFr   r   s   @r!   r   r      s    \{ \ \r    r   c                   .     e Zd Z fdZ	 	 	 	 ddZ xZS )WavLMEncoderc           
         t         |           || _        t        |      | _        t        j                  |j                  |j                        | _	        t        j                  |j                        | _        t        j                  t        |j                        D cg c]  }t!        ||dk(         c}      | _        d| _        y c c}w Nr   r   )r,   F)r/   r0   r   r   pos_conv_embedr4   r   r   r   r   r   r   r)   
ModuleListrangenum_hidden_layersr   layersgradient_checkpointingrA   r   irB   s      r!   r0   zWavLMEncoder.__init__   s    :6B,,v'9'9v?T?TUzz&"7"78mmUZ[a[s[sUtuPQv16Ku
 ',# v   !Cc                    |rdnd }|rdnd }|5|j                  d      j                  dd|j                  d         }d|| <   | j                  |      }	||	z   }| j	                  |      }| j                  |      }t               xs t        |       }
d }t        | j                        D ]y  \  }}|r||fz   }t        j                  g       }| j                  xr  |dkD  xr || j                  j                  k  }|r|
r ||||||      }|d d \  }}|rd}|sq|d   fz   }{ |r||fz   }|st        d |||fD              S t!        |||	      S )
Nr   rI   r   r   r   r   NNNc              3   &   K   | ]	  }||  y wNr   .0vs     r!   	<genexpr>z'WavLMEncoder.forward.<locals>.<genexpr>a       mq_`_lm   last_hidden_staterC   
attentions)rP   rQ   rS   r   r   r)   r   r   	enumerater   r;   randrs   r   	layerdropr   r	   rA   rC   rD   rF   output_hidden_statesreturn_dictall_hidden_statesall_self_attentionsexpand_attention_maskposition_embeddingssynced_gpusrE   r   layerdropout_probabilityskip_the_layerlayer_outputss                    r!   re   zWavLMEncoder.forward+  s    #7BD$5b4%$2$<$<R$@$G$G1mNaNabcNd$e!45M001"11-@%(;;6]302R6LT6R!$++. 	PHAu#$58H$H! #(**R.!]]fq1uf:MPTP[P[PePe:eN![ %!#1"/&7! 0=Ra/@,} 2 &9]1=M<O&O#1	P4   1]4D Dm]4EGZ$[mmm++*
 	
r    NFFTr   r   r   r0   re   r   r   s   @r!   r   r     s    	, ";
r    r   c                   .     e Zd Z fdZ	 	 	 	 ddZ xZS )WavLMEncoderStableLayerNormc           
         t         |           || _        t        |      | _        t        j                  |j                  |j                        | _	        t        j                  |j                        | _        t        j                  t        |j                        D cg c]  }t!        ||dk(         c}      | _        d| _        y c c}w r   )r/   r0   r   r   r   r4   r   r   r   r   r   r   r)   r   r   r   r   r   r   r   s      r!   r0   z$WavLMEncoderStableLayerNorm.__init__j  s    :6B,,v'9'9v?T?TUzz&"7"78mm v778 1UVZ[U[]
 ',#r   c                    |rdnd }|rdnd }|5|j                  d      j                  dd|j                  d         }d|| <   | j                  |      }	||	z   }| j	                  |      }t               xs t        |       }
d }t        | j                        D ]x  \  }}|r||fz   }t        j                  g       }| j                  xr  |dkD  xr || j                  j                  k  }|r|
r |||||      }|d d \  }}|rd}|sp|d   fz   }z | j                  |      }|r||fz   }|st        d |||fD              S t!        |||	      S )
Nr   rI   r   r   r   )rD   rF   rE   r   c              3   &   K   | ]	  }||  y wr   r   r   s     r!   r   z6WavLMEncoderStableLayerNorm.forward.<locals>.<genexpr>  r   r   r   )rP   rQ   rS   r   r)   r   r   r   r   r;   r   rs   r   r   r   r   r	   r   s                    r!   re   z#WavLMEncoderStableLayerNorm.forwardx  s    #7BD$5b4%$2$<$<R$@$G$G1mNaNabcNd$e!45M001"11-@%(;;]302R6LT6R!$++. 	PHAu#$58H$H! #(**R.!]]fq1uf:MPTP[P[PePe:eN![ !&!#1&7"/	! 0=Ra/@,} 2 &9]1=M<O&O#/	P2 6 1]4D Dm]4EGZ$[mmm+;LYl
 	
r    r   r   r   s   @r!   r   r   i  s    ," "9
r    r   c                   8     e Zd ZdZ fdZed        Zd Z xZS )WavLMGumbelVectorQuantizerz
    Vector quantization using gumbel softmax. See [CATEGORICAL REPARAMETERIZATION WITH
    GUMBEL-SOFTMAX](https://huggingface.co/papers/1611.01144) for more information.
    c                 0   t         |           |j                  | _        |j                  | _        |j                  | j                  z  dk7  r&t        d|j                   d| j                   d      t        j                  t        j                  d| j                  | j
                  z  |j                  | j                  z              | _        t        j                  |j                  d   | j                  | j
                  z        | _        d| _        y )Nr   z`config.codevector_dim z5 must be divisible by `config.num_codevector_groups` z for concatenation.r   rI   r   )r/   r0   num_codevector_groups
num_groupsnum_codevectors_per_groupnum_varscodevector_dimr2   r4   r:   r;   r   codevectorsr5   conv_dimweight_projtemperature)rA   r   rB   s     r!   r0   z#WavLMGumbelVectorQuantizer.__init__  s     6688  4??2a7)&*?*?)@ A66:oo5F G%%  <<a4==!@&BWBW[_[j[jBjk
 99V__R%8$//DMM:YZ r    c           	          | j                  d      }t        j                  t        j                  |t        j                  |dz         z  d             j                         }|S )Nr   rK   gHz>rI   )meanr;   exprU   r   )probsmarginal_probs
perplexitys      r!   _compute_perplexityz.WavLMGumbelVectorQuantizer._compute_perplexity  sR    *YY		.599^VZEZ;[*[ac ddeiik
r    c                    |j                   \  }}}| j                  |      }|j                  ||z  | j                  z  d      }| j                  rt
        j                  j                  |j                         | j                  d      }|j                  |      }t        j                  |j                  ||z  | j                  d      j                         d      }| j                  |      }n}|j                  d      } |j                  |j                    j!                  d|j                  dd      d      }|j                  ||z  | j                  d      }| j                  |      }|j                  ||z  d      }|j#                  d      | j$                  z  }	|	j                  ||z  | j                  | j&                  d      }
|
j)                  d      j                  ||d      }
|
|fS )NrI   T)tauhardrK   r   rM   )rS   r   rR   r   rs   r4   
functionalgumbel_softmaxr   r  type_asr;   softmaxr  argmax	new_zerosscatter_rP   r   r   rU   )rA   rC   
batch_sizesequence_lengthr   codevector_probscodevector_soft_distr  codevector_idxcodevectors_per_groupr   s              r!   re   z"WavLMGumbelVectorQuantizer.forward  s   3@3F3F0
O[ ((7%**:+G$//+Y[]^==!}};;M<O<O<QW[WgWgnr;s/77F $)=="":#?RTU[[]ce$  112FGJ +11b19N6}668K8KLUUN''A.   044Z/5QSWSbSbdfg112BCJ+00o1MrR 0 : :2 >AQAQ Q+00o1Mt`d`m`moqr!oob)..z?BOJ&&r    )	r   r   r   r   r0   staticmethodr  re   r   r   s   @r!   r   r     s&    
*  
"'r    r   c                   H    e Zd ZU eed<   dZdZdZdZdZ	dZ
d Zd Zd Zd	 Zy
)WavLMPreTrainedModelr   wavlminput_valuesTFc           
      z   t        |t              r|j                  j                  j                  j                  dd       |j                  j                  j                  j                          t        j                  j                  |j                         yt        |t              rt        j                  j                  |j                  j                  ddt        j                  d|j                  j                   d   |j                  j"                  z  z        z         t        j                  j%                  |j                  j                  d       yt        |t&              rt        j                  d|j(                  j*                  z        }t        j                  j                  |j(                  j                  | |       t        j                  j                  |j(                  j                  | |       yt        |t        j,                        rm|j                  j                  j                  d| j.                  j0                         |j                  %|j                  j                  j                          yyt        |t        j2                  t        j4                  f      rJ|j                  j                  j                          |j                  j                  j7                  d       yt        |t        j8                        rt        j                  j;                  |j                         |j                  jt        j                  |j<                  |j"                  |j                   d   z  z        }t        j                  j                  |j                  | |       yyy)	zInitialize the weightsr   r   )r  stdr   r   )abNrM   )
isinstancer   r   rr   datanormal_rq   zero_r4   inituniform_r   r   convr   sqrtkernel_sizein_channels	constant_r$   
projectionin_featuresr5   r   initializer_ranger   	GroupNormfill_Conv1dkaiming_normal_groups)rA   moduleks      r!   _init_weightsz"WavLMPreTrainedModel._init_weights  s    f89%%**222C##((..0GGV//0 <=GGOO""		!v{{'>'>q'AFKKD[D['["\]]  
 GGfkk..2 67		!f//;;;<AGGV..55!qAGGV..33rQ?		*MM&&CT[[5R5R&S{{&  &&( 'r|| <=KK""$MM$$S)		*GG##FMM2{{&IIfmmv/A/AFDVDVWXDY/YZ[  a 8 ' +r    c                     t        d      NzNot needed for WavLMAttributeErrorrA   s    r!   _get_adaptersz"WavLMPreTrainedModel._get_adapters$      344r    c                     t        d      r:  r;  r=  s    r!   init_adapter_layersz(WavLMPreTrainedModel.init_adapter_layers'  r?  r    c                     t        d      r:  r;  r=  s    r!   load_adapterz!WavLMPreTrainedModel.load_adapter*  r?  r    N)r   r   r   r   __annotations__base_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_supports_sdpa_supports_flex_attnr8  r>  rA  rC  r   r    r!   r  r    s?    $O&*# N9B555r    r  c                       e Zd Zy)
WavLMModelNr   r   r    r!   rL  rL  1  r"   r    rL  c                       e Zd Zy)WavLMForCTCNr   r   r    r!   rN  rN  5  r"   r    rN  c                       e Zd Zy)WavLMForSequenceClassificationNr   r   r    r!   rP  rP  9  r"   r    rP  c                       e Zd Zy) WavLMForAudioFrameClassificationNr   r   r    r!   rR  rR  =  r"   r    rR  c                       e Zd Zy)WavLMForXVectorNr   r   r    r!   rT  rT  A  r"   r    rT  )rR  rN  rP  rT  rL  r  )8r   typingr   r   r;   torch.nnr4   torch.nn.functionalr  rm   integrations.deepspeedr   integrations.fsdpr   modeling_layersr   modeling_outputsr	   r
   modeling_utilsr   utilsr   wav2vec2.modeling_wav2vec2r   r   r   r   r   r   r   r   r   configuration_wavlmr   
get_loggerr   loggerr   r$   Moduler&   r   r   r   r   r   r   r  WavLMBaseModelOutputrL  rN  rP  rR  rT  __all__r   r    r!   <module>re     s>    "     @ 7 9 H - 
 
 
 - 
		H	%	#B 		6 	c RYY c L	* 	&2 &R"'A "JG
299 G
TH
")) H
VC' C'L15?,C 15h / 	 		. 		%F 		'J 		( 	r    