
    rh                     H   d dl Z d dlZd dlmZmZmZ d dlZd dlZd dlm	Z	 d dl
mZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZmZmZmZmZmZ ddl m!Z!m"Z" ddl#m$Z$ ddl%m&Z&m'Z'm(Z( ddl)m*Z*  e(       rddl+m,Z,  G d de      Z- G d de	j\                        Z/ G d de	j\                        Z0 G d de	j\                        Z1 G d de	j\                        Z2 G d de	j\                        Z3	 	 	 dPde	j\                  d ejh                  d!ejh                  d"ejh                  d#eejh                     d$ee5   d%e5d&eejh                     fd'Z6 G d( d)e	j\                        Z7 G d* d+e	j\                        Z8 G d, d-e      Z9 G d. d/e	j\                        Z: G d0 d1e	j\                        Z; G d2 d3e	j\                        Z<e& G d4 d5e"             Z=	 	 dQd6e>e?e?f   d7e5d8e?d#eej                     d9e?d:ej                  fd;ZBeZCe& G d< d=e=             ZDd>ZE e&d?@       G dA dBe=             ZF e&dC@       G dD dEe=             ZGe& G dF dGe=             ZH G dH dIe	j\                        ZI G dJ dKe	j\                        ZJ e&dL@       G dM dNe=             ZKg dOZLy)R    N)CallableOptionalUnion)nn)CrossEntropyLoss   )ACT2FN)is_deepspeed_zero3_enabled)is_fsdp_managed_module)_prepare_4d_attention_mask#_prepare_4d_attention_mask_for_sdpa)FlashAttentionKwargs)GradientCheckpointingLayer)BaseModelOutputCausalLMOutputSequenceClassifierOutputTokenClassifierOutputWav2Vec2BaseModelOutputXVectorOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)auto_docstringis_peft_availableis_torch_flex_attn_available   )Data2VecAudioConfig)make_flex_block_causal_maskc                   &     e Zd Zd fd	Zd Z xZS )Data2VecAudioConvLayerc                    t         |           |dkD  r|j                  |dz
     nd| _        |j                  |   | _        t        j                  | j                  | j                  |j                  |   |j                  |   |j                        | _
        t        j                  | j                  d      | _        t        |j                     | _        y )Nr   r   )kernel_sizestridebiasTelementwise_affine)super__init__conv_dimin_conv_dimout_conv_dimr   Conv1dconv_kernelconv_stride	conv_biasconv	LayerNorm
layer_normr	   feat_extract_activation
activationselfconfiglayer_id	__class__s      /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/data2vec/modeling_data2vec_audio.pyr(   zData2VecAudioConvLayer.__init__8   s    <DqL6??8a<8a"OOH5II**84%%h/!!
	 ,,t'8'8TR !?!?@    c                     | j                  |      }|j                  dd      }| j                  |      }|j                  dd      }| j                  |      }|S )N)r0   	transposer2   r4   r6   hidden_statess     r:   forwardzData2VecAudioConvLayer.forwardG   sV    		-0%//B76%//B76r;   r   __name__
__module____qualname__r(   rB   __classcell__r9   s   @r:   r    r    7   s    Ar;   r    c                   $     e Zd Z fdZd Z xZS )Data2VecAudioPadLayerc                 P    t         |           |dz  dk(  rd| _        y d| _        y )N   r   r   )r'   r(   num_pad_remove)r6   num_conv_pos_embeddingsr9   s     r:   r(   zData2VecAudioPadLayer.__init__S   s)    #:Q#>!#Car;   c                 V    | j                   dkD  r|d d d d d | j                    f   }|S Nr   )rN   r@   s     r:   rB   zData2VecAudioPadLayer.forwardW   s6    ")!Q0F43F3F2F0F*FGMr;   rD   rI   s   @r:   rK   rK   R   s    Kr;   rK   c                   $     e Zd Z fdZd Z xZS ) Data2VecAudioPositionalConvLayerc                 z   t         |           t        j                  |j                  |j                  |j
                  |j
                  dz  |j                        | _        t        |j
                        | _	        t        |j                     | _        t        j                  |j                  d      | _        y )NrM   )r"   paddinggroupsFr%   )r'   r(   r   r,   hidden_sizeconv_pos_kernel_sizenum_conv_pos_embedding_groupsr0   rK   rU   r	   r3   r4   r1   r2   r6   r7   r9   s     r:   r(   z)Data2VecAudioPositionalConvLayer.__init__^   s    II33//1477
	 -V-H-HI !?!?@,,v'9'9eTr;   c                     | j                  |      }| j                  |      }|j                  dd      }| j                  |      }|j                  dd      }| j	                  |      }|S Nr   rM   )r0   rU   r?   r2   r4   r@   s     r:   rB   z(Data2VecAudioPositionalConvLayer.forwardm   sd    		-0]3%//156%//156r;   rD   rI   s   @r:   rS   rS   ]   s    Ur;   rS   c                   $     e Zd Z fdZd Z xZS )$Data2VecAudioPositionalConvEmbeddingc                     t         |           t        j                  t	        |j
                        D cg c]  }t        |       c}      | _        y c c}w N)r'   r(   r   
ModuleListrangerO   rS   layersr6   r7   _r9   s      r:   r(   z-Data2VecAudioPositionalConvEmbedding.__init__y   s@    mm?DVEcEc?de!-f5e
es   Ac                     |j                  dd      }| j                  D ]
  } ||      } |j                  dd      }|S r\   )r?   rc   )r6   rA   layers      r:   rB   z,Data2VecAudioPositionalConvEmbedding.forward   sI    %//15[[ 	1E!-0M	1%//15r;   rD   rI   s   @r:   r^   r^   x   s    
r;   r^   c                   .     e Zd ZdZ fdZd Zd Z xZS )Data2VecAudioFeatureEncoderz.Construct the features from raw audio waveformc           	          t         |           t        j                  t	        |j
                        D cg c]  }t        ||       c}      | _        d| _        d| _	        y c c}w )N)r8   FT)
r'   r(   r   ra   rb   num_feat_extract_layersr    conv_layersgradient_checkpointing_requires_grad)r6   r7   ir9   s      r:   r(   z$Data2VecAudioFeatureEncoder.__init__   sX    ==AFvGeGeAfgA#FQ7g
 ',#" hs   A%c                 J    | j                         D ]	  }d|_         d| _        y NF)
parametersrequires_gradrn   r6   params     r:   _freeze_parametersz.Data2VecAudioFeatureEncoder._freeze_parameters   s(    __& 	(E"'E	(#r;   c                     |d d d f   }| j                   r| j                  rd|_        | j                  D ]
  } ||      } |S )NT)rn   trainingrs   rl   )r6   input_valuesrA   
conv_layers       r:   rB   z#Data2VecAudioFeatureEncoder.forward   sP    $QW- 4==*.M'** 	6J&}5M	6 r;   )rE   rF   rG   __doc__r(   rv   rB   rH   rI   s   @r:   ri   ri      s    8#$

r;   ri   c                   $     e Zd Z fdZd Z xZS )Data2VecAudioFeatureProjectionc                 4   t         |           t        j                  |j                  d   |j
                        | _        t        j                  |j                  d   |j                        | _	        t        j                  |j                        | _        y )Nr>   eps)r'   r(   r   r1   r)   layer_norm_epsr2   LinearrW   
projectionDropoutfeat_proj_dropoutdropoutrZ   s     r:   r(   z'Data2VecAudioFeatureProjection.__init__   sf    ,,vr':@U@UV))FOOB$79K9KLzz&":":;r;   c                 p    | j                  |      }| j                  |      }| j                  |      }||fS r`   )r2   r   r   )r6   rA   norm_hidden_statess      r:   rB   z&Data2VecAudioFeatureProjection.forward   s:    !__];(:;]3000r;   rD   rI   s   @r:   r}   r}      s    <1r;   r}   modulequerykeyvalueattention_maskscalingr   	head_maskc                    ||j                  d      dz  }t        j                  ||j                  dd            |z  }	||	|z   }	t        j
                  j                  |	d      }	||	|j                  dddd      z  }	t        j
                  j                  |	|| j                        }	t        j                  |	|      }
|
j                  dd      j                         }
|
|	fS )Nr>         rM   r   dimr   )prx   )sizetorchmatmulr?   r   
functionalsoftmaxviewr   rx   
contiguous)r   r   r   r   r   r   r   r   kwargsattn_weightsattn_outputs              r:   eager_attention_forwardr      s     **R.D(<<s}}Q':;gEL!#n4==((2(>L#innQAq&AA==((6??([L,,|U3K''1-88:K$$r;   c                   H    e Zd ZdZ	 	 	 	 	 ddededededededee   f fd	Z		 	 	 	 dd
e
j                  dee
j                     dee
j                     dee
j                     dee   dee   dee
j                  ee
j                     eee
j                        f   fdZ xZS )Data2VecAudioAttentionz=Multi-headed attention from 'Attention Is All You Need' paper	embed_dim	num_headsr   
is_decoderr$   	is_causalr7   c                 
   t         |           || _        || _        || _        ||z  | _        || _        | j
                  |z  | j                  k7  rt        d| j                   d| d      | j
                  dz  | _        || _	        || _
        t        j                  |||      | _        t        j                  |||      | _        t        j                  |||      | _        t        j                  |||      | _        y )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).r   )r$   )r'   r(   r   r   r   head_dimr7   
ValueErrorr   r   r   r   r   k_projv_projq_projout_proj)	r6   r   r   r   r   r$   r   r7   r9   s	           r:   r(   zData2VecAudioAttention.__init__   s     	""!Y.MMI%$..8MdnnM]$YKr3  }}d*$"ii	94@ii	94@ii	94@		)YTBr;   rA   key_value_statesr   layer_head_maskoutput_attentionsr   returnc                    |du}|j                   dd \  }}	|r|j                   d   n|	}
||	d| j                  f}||
d| j                  f} | j                  |      j                  | j	                  dd      }|r|n|} | j                  |      j                  | j	                  dd      } | j                  |      j                  | j	                  dd      }t        }| j                  j                  dk7  rt        | j                  j                     } || ||||f| j                  sdn| j                  | j                  ||d|\  }}|j                  ||	d      j                         }| j!                  |      }||dfS )z#Input shape: Batch x Time x ChannelNr>   r   rM   eager        )r   r   r   r   )shaper   r   r   r?   r   r   r   r7   _attn_implementationr   rx   r   r   reshaper   r   )r6   rA   r   r   r   r   r   is_cross_attentionbsztgt_lensrc_lenq_input_shapekv_input_shapequery_statescurrent_states
key_statesvalue_statesattention_interfacer   r   s                       r:   rB   zData2VecAudioAttention.forward   s    .T9 %**3B/W/A"((+wgr4==9wDMM: 7t{{=166FPPQRTUV-?)]5T[[055~FPPQRTUV
7t{{>277HRRSTVWX(?;;++w6"9$++:Z:Z"[$7%
  $}}C$,,LL/%%
 %
!\ "))#w;FFHmmK0L$..r;   )r   FTFN)NNNF)rE   rF   rG   r{   intfloatboolr   r   r(   r   Tensorr   r   tuplerB   rH   rI   s   @r:   r   r      s   G  04CC C 	C
 C C C ,-CD 481526,13/||3/ #5<<03/ !.	3/
 "%,,/3/ $D>3/ -.3/ 
u||Xell3XeELL>Q5RR	S3/r;   r   c                   $     e Zd Z fdZd Z xZS )Data2VecAudioFeedForwardc                    t         |           t        j                  |j                        | _        t        j                  |j                  |j                        | _	        t        |j                  t              rt        |j                     | _        n|j                  | _        t        j                  |j                  |j                        | _        t        j                  |j                         | _        y r`   )r'   r(   r   r   activation_dropoutintermediate_dropoutr   rW   intermediate_sizeintermediate_dense
isinstance
hidden_actstrr	   intermediate_act_fnoutput_densehidden_dropoutoutput_dropoutrZ   s     r:   r(   z!Data2VecAudioFeedForward.__init__*  s    $&JJv/H/H$I!"$))F,>,>@X@X"Yf''-'-f.?.?'@D$'-'8'8D$IIf&>&>@R@RS jj)>)>?r;   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }| j	                  |      }|S r`   )r   r   r   r   r   r@   s     r:   rB   z Data2VecAudioFeedForward.forward7  sX    //>00?11-@))-8++M:r;   rD   rI   s   @r:   r   r   )  s    @r;   r   c                   &     e Zd Z fdZddZ xZS )Data2VecAudioEncoderLayerc                    t         |           t        |j                  |j                  |j
                  d|      | _        t        j                  |j                        | _
        t        j                  |j                  |j                        | _        t        |      | _        t        j                  |j                  |j                        | _        y )NF)r   r   r   r   r7   r   )r'   r(   r   rW   num_attention_headsattention_dropout	attentionr   r   r   r   r1   r   r2   r   feed_forwardfinal_layer_normrZ   s     r:   r(   z"Data2VecAudioEncoderLayer.__init__B  s    /((00,,
 zz&"7"78,,v'9'9v?T?TU4V< "V-?-?VEZEZ [r;   c                     |}| j                  |||      \  }}}| j                  |      }||z   }| j                  |      }|| j                  |      z   }| j	                  |      }|f}|r||fz  }|S )Nr   r   )r   r   r2   r   r   )r6   rA   r   r   attn_residualr   re   outputss           r:   rB   z!Data2VecAudioEncoderLayer.forwardQ  s    %)-.L] *8 *
&|Q ]3%56%(9(9-(HH--m< "&Gr;   rq   rD   rI   s   @r:   r   r   A  s    \r;   r   c                        e Zd Z fdZ	 	 	 	 ddej
                  deej                     dededef
dZ	de
ej                  df   d	ej                  fd
Z xZS )Data2VecAudioEncoderc                    t         |           || _        t        |      | _        t        j                  |j                  |j                        | _	        t        j                  |j                        | _        t        j                  t        |j                        D cg c]  }t!        |       c}      | _        d| _        y c c}w )Nr   F)r'   r(   r7   r^   pos_conv_embedr   r1   rW   r   r2   r   r   r   ra   rb   num_hidden_layersr   rc   rm   rd   s      r:   r(   zData2VecAudioEncoder.__init__f  s    B6J,,v'9'9v?T?TUzz&"7"78mmPUV\VnVnPo$p1%>v%F$pq&+# %qs   !CNrA   r   r   output_hidden_statesreturn_dictc                    |rdnd }|rdnd }|5|j                  d      j                  dd|j                  d         }d|| <   | j                  ||      }| j	                  |      }	||	z   }| j                  |      }| j                  |      }t               xs t        |       }
| j                  D ]j  }|r||fz   }t        j                  g       }| j                  xr || j                  j                  k  }|r|
r ||||      }|d   }|rd}|sb|d   fz   }l |r||fz   }|st        d |||fD              S t!        |||	      S )
N r>   r   rM   r   r   NNc              3   &   K   | ]	  }||  y wr`   r   ).0vs     r:   	<genexpr>z/Data2VecAudioEncoder.forward.<locals>.<genexpr>  s     mq_`_lms   )last_hidden_staterA   
attentions)	unsqueezerepeatr   _update_full_maskr   r2   r   r
   r   rc   r   randrx   r7   	layerdropr   r   )r6   rA   r   r   r   r   all_hidden_statesall_self_attentionsexpand_attention_maskposition_embeddingssynced_gpusrg   dropout_probabilityskip_the_layerlayer_outputss                  r:   rB   zData2VecAudioEncoder.forwardo  s    #7BD$5b4%$2$<$<R$@$G$G1mNaNabcNd$e!45M001//

 #11-@%(;;6]302R6LT6R[[ 	PE#$58H$H! #(**R.!]]Z/BT[[EZEZ/ZN![ %!.Te! !.a 0 , &9]1=M<O&O#'	P*   1]4D Dm]4EGZ$[mmm++*
 	
r;   inputs_embedsc                 f   || j                   j                  dk(  rd|v r|}|S d }|S | j                   j                  dk(  rt        ||j                        }|S | j                   j                  dk(  r)t	        |t
        j                        rt        |d      }|S t        ||j                        }|S )Nflash_attention_2r   sdpaflex_attentionF)r   )	r7   r   r   dtyper   r   r   r   r   )r6   r   r   s      r:   r   z&Data2VecAudioEncoder._update_full_mask  s    
 %{{//3FF343F  MQ  11V; "E^UbUhUh!i  115EEnell;%@[`%aN
  "<NML_L_!`r;   )NFFT)rE   rF   rG   r(   r   tensorr   r   r   rB   r   r   rH   rI   s   @r:   r   r   e  s    , 26"'%* :
||:
 !.:
  	:

 #:
 :
xellD01 ||r;   r   c                   $     e Zd Z fdZd Z xZS )Data2VecAudioAdapterLayerc                     t         |           t        j                  |j                  d|j                  z  |j
                  |j                  d      | _        y )NrM   r   )r#   rU   )r'   r(   r   r,   output_hidden_sizeadapter_kernel_sizeadapter_strider0   rZ   s     r:   r(   z"Data2VecAudioAdapterLayer.__init__  sJ    II%%)))&&((
	r;   c                 j    | j                  |      }t        j                  j                  |d      }|S )Nr   r   )r0   r   r   glur@   s     r:   rB   z!Data2VecAudioAdapterLayer.forward  s/    		-0))-Q)?r;   rD   rI   s   @r:   r  r    s    
r;   r  c                   $     e Zd Z fdZd Z xZS )Data2VecAudioAdapterc                    t         |           j                  j                  k7  rTt	        j
                  j                  j                        | _        t	        j                  j                        | _        nd x| _        | _        t	        j                  fdt        j                        D              | _        j                  | _        y )Nc              3   4   K   | ]  }t                y wr`   )r  )r   re   r7   s     r:   r   z0Data2VecAudioAdapter.__init__.<locals>.<genexpr>  s     #p!$=f$E#ps   )r'   r(   r	  rW   r   r   projr1   proj_layer_normra   rb   num_adapter_layersrc   r   rZ   s    `r:   r(   zData2VecAudioAdapter.__init__  s     $$(:(::		&"4"4f6O6OPDI#%<<0I0I#JD /33DI,mm#puU[UnUnOo#pp))r;   c                 h   | j                   .| j                  "| j                  |      }| j                  |      }|j                  dd      }| j                  D ]D  }t        j
                  j                         }| j                  r|| j                  kD  s= ||      }F |j                  dd      }|S r\   )r  r  r?   rc   nprandomrx   r   )r6   rA   rg   layerdrop_probs       r:   rB   zData2VecAudioAdapter.forward  s    99 T%9%9%E IIm4M 00?M%//15[[ 	5EYY--/N==^dnn%D %m 4	5
 &//15r;   rD   rI   s   @r:   r  r    s    *r;   r  c                       e Zd ZU eed<   dZdZdZdZdZ	dZ
d Z	 ddeej                  ef   dee   fd	Z	 dd
edej                  fdZy)Data2VecAudioPreTrainedModelr7   data2vec_audiory   Tc                    t        |t              rt        j                  d|j                  j
                  z        }t        j                  j                  |j                  j                  | |       t        j                  j                  |j                  j                  | |       yt        |t              r5t        j                  j                  |j                  j                  d       yt        |t        j                        rm|j                  j                  j!                  d| j"                  j$                         |j                  %|j                  j                  j'                          yyt        |t        j(                  t        j*                  f      rc|j                  $|j                  j                  j'                          |j                  &|j                  j                  j-                  d       yyt        |t        j.                        rt        j                  j1                  |j                         |j                  jt        j                  |j2                  |j4                  |j6                  d   z  z        }t        j                  j                  |j                  | |       yyy)zInitialize the weightsr   )abr   r   )meanstdNg      ?)r   r}   mathsqrtr   in_featuresr   inituniform_weightr$   rS   	constant_r0   r   datanormal_r7   initializer_rangezero_r1   	GroupNormfill_r,   kaiming_normal_rV   in_channelsr"   )r6   r   ks      r:   _init_weightsz*Data2VecAudioPreTrainedModel._init_weights  s   f<=		!f//;;;<AGGV..55!qAGGV..33rQ? @AGGfkk..2		*MM&&CT[[5R5R&S{{&  &&( 'r|| <={{&  &&(}}(""((- )		*GG##FMM2{{&IIfmmv/A/AFDVDVWXDY/YZ[  a 8 ' +r;   Ninput_lengthsadd_adapterc                 T   || j                   j                  n|}d }t        | j                   j                  | j                   j                        D ]  \  }} ||||      } |rBt        | j                   j                        D ]   } ||d| j                   j                        }" |S )zH
        Computes the output length of the convolutional layers
        c                 >    t        j                  | |z
  |d      dz   S )Nfloor)rounding_moder   )r   divinput_lengthr"   r#   s      r:   _conv_out_lengthzWData2VecAudioPreTrainedModel._get_feat_extract_output_lengths.<locals>._conv_out_length  s"     99\K7wWZ[[[r;   r   )r7   r3  zipr-   r.   rb   r  r  )r6   r2  r3  r;  r"   r#   re   s          r:    _get_feat_extract_output_lengthsz=Data2VecAudioPreTrainedModel._get_feat_extract_output_lengths  s     2=1Ddkk--+	\
 $'t{{'>'>@W@W#X 	QK,]KPM	Q 4;;99: _ 04;;C]C] ^_ r;   feature_vector_lengthr   c                     |j                  d      d d df   }| j                  ||      }|j                  t        j                        }|j
                  d   }t        j                  ||f|j                  |j                        }d|t        j                  |j
                  d   |j                        |dz
  f<   |j                  dg      j                  d      j                  dg      j                         }|S )Nr>   r   r3  r   )r  devicer   )rA  )cumsumr=  tor   longr   zerosr  rA  arangeflipr   )r6   r>  r   r3  non_padded_lengthsoutput_lengths
batch_sizes          r:   "_get_feature_vector_attention_maskz?Data2VecAudioPreTrainedModel._get_feature_vector_attention_mask-  s    
 ,22r2:1b5A>>?Q_j>k'**5::6#))!,
./~7K7KTbTiTi
 uv^%9%9!%<^EZEZ[]kno]opq',,bT299"=BBB4HMMOr;   r`   )rE   rF   rG   r   __annotations__base_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_supports_sdpa_supports_flex_attnr1  r   r   
LongTensorr   r   r   r=  rK  r   r;   r:   r  r    s    ($O&*#N94 Z^"5#3#3S#89HPQU0 Y]%(:?:J:Jr;   r  r   	mask_probmask_length	min_masksr   c                    | \  }dk  rt        d      kD  rt        d d d      t        j                  j                  d      j	                         fd}|-|j                         j                  d      j                         nt        |      D cg c]  } c}}t        j                  |ft        	      }	g }
 |      }|d
k(  r|	S |D ]  } ||      }t        j                  j                  t        j                  |dz
  z
        |d      }t        |      d
k(  rdz
  }n|d
   }t        j                  |t        j                  ||z
  t        j                   	      |z  g      }|
j#                  |        t        j$                  |
      }
t        j&                  |
dddddf   ||f      }
|
j)                  ||z        }
t        j                        ddddf   }t        j&                  |||f      j)                  ||z        }|
|z   }
|
j+                         dz
  kD  rdz
  |
|
dz
  kD  <   t        j,                  |	|
dd       |	S c c}w )an  
    Computes random mask spans for a given shape. Used to implement [SpecAugment: A Simple Data Augmentation Method for
    ASR](https://huggingface.co/papers/1904.08779). Note that this method is not optimized to run on TPU and should be run on
    CPU as part of the preprocessing during training.

    Args:
        shape: The shape for which to compute masks. This should be of a tuple of size 2 where
               the first element is the batch size and the second element is the length of the axis to span.
        mask_prob:  The percentage of the whole axis (between 0 and 1) which will be masked. The number of
                    independently generated mask spans of length `mask_length` is computed by
                    `mask_prob*shape[1]/mask_length`. Note that due to overlaps, `mask_prob` is an upper bound and the
                    actual percentage will be smaller.
        mask_length: size of the mask
        min_masks: minimum number of masked spans
        attention_mask: A (right-padded) attention mask which independently shortens the feature axis of
                        each batch dimension.
    r   z&`mask_length` has to be bigger than 0.zO`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: z and `sequence_length`: `c                     t        | z  z  z         }t        |      }|z  kD  rz  }| dz
  z
  |k  rt        | dz
  z
  d      }|S )z;Given input length, compute how many spans should be maskedr   r   )r   max)r:  num_masked_spanepsilonrU  rT  rV  sequence_lengths     r:   compute_num_masked_spanz6_compute_mask_indices.<locals>.compute_num_masked_spanh  so    i,6DwNOoy9 [(?:-<O ;?+o=!,+/"BAFOr;   Nr>   r  r   F)replace)r   r  r  r   itemdetachsumtolistrb   rE  r   choicerF  lenconcatenateonesint32appendarraybroadcast_tor   rZ  put_along_axis)r   rT  rU  r   rV  rJ  r^  re   r2  spec_aug_maskspec_aug_mask_idxsmax_num_masked_spanr:  r[  spec_aug_mask_idxdummy_mask_idxoffsetsr\  r]  s    `` `            @@r:   _compute_mask_indicesrt  B  s   0 #(JQABB_$]^i]j&&7q:
 	
 iinnQ$$&G $ % 	##B'..0',Z'89!o9  HHj/:$GM1/Ba% 51,? II,,IIlkAo67RW - 
  !Q& -q0N.q1NNN(;o(MUWU]U] ^ao op
 	!!"34/52 "45 1a:&5H+(V ,33J@SVa@ab ii$T4]3Goog
4G'UV^^'+5G ,g5 /A"55GVYZGZ-!0CCD m%7B?w :s   $	I+c                   0    e Zd Zdef fdZd Z	 	 ddej                  deej                     deej                     fdZ
e	 	 	 	 	 ddeej                     deej                     deej                     d	ee   d
ee   dee   deeef   fd       Z xZS )Data2VecAudioModelr7   c                    t         |   |       || _        t        |      | _        t        |      | _        |j                  dkD  s|j                  dkD  rEt        j                  t        j                  |j                        j                               | _        t!        |      | _        |j$                  rt'        |      nd | _        | j+                          y )Nr   )r'   r(   r7   ri   feature_extractorr}   feature_projectionmask_time_probmask_feature_probr   	Parameterr   r   rW   r%  masked_spec_embedr   encoderr3  r  adapter	post_initrZ   s     r:   r(   zData2VecAudioModel.__init__  s     !<V!D"@"H   3&&*B*BS*H%'\\%,,v?Q?Q2R2[2[2]%^D"+F37=7I7I+F3t 	r;   c                 8    | j                   j                          y
        Calling this function will disable the gradient computation for the feature encoder so that its parameter will
        not be updated during training.
        N)rx  rv   r6   s    r:   freeze_feature_encoderz)Data2VecAudioModel.freeze_feature_encoder  s    
 	113r;   rA   mask_time_indicesr   c                    t        | j                  dd      s|S |j                         \  }}}|)| j                  j	                  |j
                        ||<   n| j                  j                  dkD  r| j                  rt        ||f| j                  j                  | j                  j                  || j                  j                        }t        j                  ||j                  t        j                        }| j                  j	                  |j
                        ||<   | j                  j                  dkD  r| j                  rt        ||f| j                  j                  | j                  j                   | j                  j"                        }t        j                  ||j                  t        j                        }|dddf   j%                  d|d      }d||<   |S )	z
        Masks extracted features along time axis and/or along feature axis according to
        [SpecAugment](https://huggingface.co/papers/1904.08779).
        apply_spec_augmentTNr   )rT  rU  r   rV  )rA  r  )rT  rU  rV  r>   )getattrr7   r   r}  rC  r  rz  rx   rt  mask_time_lengthmask_time_min_masksr   r  rA  r   r{  mask_feature_lengthmask_feature_min_masksexpand)r6   rA   r  r   rJ  r]  rW   mask_feature_indicess           r:   _mask_hidden_statesz&Data2VecAudioModel._mask_hidden_states  s    t{{$8$?   4A3E3E3G0
O[(/3/E/E/H/HI\I\/]M+,[[''!+ 5_-++44 KK88-++99! !&->}G[G[chcmcm n/3/E/E/H/HI\I\/]M+,;;((1,#8[)++77 KK;;++<<	$  $)<<0D]MaMainisis#t #74#@#G#GO]_#` 23M./r;   ry   r   r   r   r   c                 H   ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }| j	                  |      }|j                  dd      }|!| j                  |j                  d   |d      }| j                  |      \  }}| j                  |||      }| j                  |||||      }	|	d   }| j                  | j                  |      }|s
||f|	dd z   S t        |||	j                  |	j                  	      S )
a/  
        mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Indices to mask extracted features for contrastive loss. When in training mode, model learns to predict
            masked extracted features in *config.proj_codevector_dim* space.
        Nr   rM   Fr@  )r  r   r   r   r   r   r   )r   extract_featuresrA   r   )r7   r   r   use_return_dictrx  r?   rK  r   ry  r  r~  r  Data2VecAudioBaseModelOutputrA   r   )
r6   ry   r   r  r   r   r   r  rA   encoder_outputss
             r:   rB   zData2VecAudioModel.forward  sb    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]11,?+55a;%!DD &&q)>u E N +/*A*ABR*S''00->~ 1 
 ,,)/!5# ' 
 (*<<# LL7M!#34qr7JJJ++-)77&11	
 	
r;   r   NNNNN)rE   rF   rG   r   r(   r  r   FloatTensorr   rS  r  r   r   r   r   r   r  rB   rH   rI   s   @r:   rv  rv    s    2 "4 :>59	,((, $E$5$56, !!1!12	,\  269=,0/3&*7
u||,7
 !.7
 $E$5$56	7

 $D>7
 'tn7
 d^7
 
u22	37
 7
r;   rv  rM   zu
    Data2VecAudio Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).
    )custom_introc                        e Zd Z fdZd Zd Ze	 	 	 	 	 ddeej                     deej                     dee
   dee
   dee
   d	eej                     d
eeef   fd       Z xZS )Data2VecAudioForCTCc                    t         |   |       t        |      | _        t	        j
                  |j                        | _        |j                  t        d| j                   d      t        |d      r|j                  r|j                  n|j                  }t	        j                  ||j                        | _        | j#                          y)a7  
        target_lang (`str`, *optional*):
            Language id of adapter weights. Adapter weights are stored in the format adapter.<lang>.safetensors or
            adapter.<lang>.bin. Only relevant when using an instance of [`Data2VecAudioForCTC`] with adapters. Uses 'eng' by
            default.
        NzYou are trying to instantiate z with a configuration that does not define the vocabulary size of the language model head. Please instantiate the model as follows: `Data2VecAudioForCTC.from_pretrained(..., vocab_size=vocab_size)`. or define `vocab_size` of your model's configuration.r3  )r'   r(   rv  r  r   r   final_dropoutr   
vocab_sizer   r9   hasattrr3  r	  rW   r   lm_headr  )r6   r7   r	  r9   s      r:   r(   zData2VecAudioForCTC.__init__H  s     	 08zz&"6"67$00@ AH H  *1)GFL^L^F%%djdvdv 	 yy!3V5F5FG 	r;   c                 X    t        j                  dt               | j                          yr  The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5. Please use the equivalent `freeze_feature_encoder` method instead.NwarningswarnFutureWarningr  r  s    r:   freeze_feature_extractorz,Data2VecAudioForCTC.freeze_feature_extractorc  '    
 	Q	

 	##%r;   c                 L    | j                   j                  j                          yr  r  rx  rv   r  s    r:   r  z*Data2VecAudioForCTC.freeze_feature_encodero      
 	--@@Br;   ry   r   r   r   r   labelsr   c           
         ||n| j                   j                  }|I|j                         | j                   j                  k\  r"t	        d| j                   j                         | j                  |||||      }|d   }| j                  |      }| j                  |      }	d}
|b||n$t        j                  |t        j                        }| j                  |j                  d            j                  t        j                        }|dk\  }|j                  d      }|j                  |      }t        j                   j#                  |	dt        j$                        j'                  dd      }t        j(                  j*                  j-                  d	
      5  t        j                   j/                  ||||| j                   j0                  | j                   j2                  | j                   j4                        }
ddd       |s|	f|t6        d z   }|
|
f|z   S |S t9        |
|	|j:                  |j<                        S # 1 sw Y   ExY w)a  
        labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*):
            Labels for connectionist temporal classification. Note that `target_length` has to be smaller or equal to
            the sequence length of the output logits. Indices are selected in `[-100, 0, ..., config.vocab_size - 1]`.
            All labels set to `-100` are ignored (masked), the loss is only computed for labels in `[0, ...,
            config.vocab_size - 1]`.
        Nz$Label values must be <= vocab_size: r  r   r_  r>   )r   r  r   F)enabled)blank	reductionzero_infinitylosslogitsrA   r   )r7   r  rZ  r  r   r  r   r  r   	ones_likerD  r=  rc  rC  masked_selectr   r   log_softmaxfloat32r?   backendscudnnflagsctc_losspad_token_idctc_loss_reductionctc_zero_infinity_HIDDEN_STATES_START_POSITIONr   rA   r   )r6   ry   r   r   r   r   r  r   rA   r  r  r2  labels_masktarget_lengthsflattened_targets	log_probsoutputs                    r:   rB   zData2VecAudioForCTC.forwardv  s)   " &1%<k$++B]B]&**,$++2H2H"HCDKKDZDZC[\]]%%)/!5# & 
  
]3m, #1"<%//R^fkfpfpBq  !AA.BTBTUWBXY\\]b]g]ghM !A+K(__R0N & 4 4[ A 11&b1V``abdefI%%++E+: 	}}--%!"++22"kk<<"&++"?"? . 	 Y)F)G!HHF)-)9TGf$EvEfG4I4IV]VhVh
 	
	 	s   A#IIr  )rE   rF   rG   r(   r  r  r   r   r   r   r   r   r   r   rB   rH   rI   s   @r:   r  r  B  s    6
&C  26,0/3&*)-D
u||,D
 !.D
 $D>	D

 'tnD
 d^D
 &D
 
un$	%D
 D
r;   r  z
    Data2VecAudio Model with a sequence classification head on top (a linear layer over the pooled output) for tasks like
    SUPERB Keyword Spotting.
    c                        e Zd Z fdZd Zd Zd Ze	 	 	 	 	 ddee	j                     dee	j                     dee   dee   d	ee   d
ee	j                     deeef   fd       Z xZS )&Data2VecAudioForSequenceClassificationc                    t         |   |       t        |d      r|j                  rt	        d      t        |      | _        |j                  dz   }|j                  r0t        j                  t        j                  |      |z        | _        t        j                  |j                  |j                         | _        t        j                  |j                   |j$                        | _        | j)                          y )Nr3  zdSequence classification does not support the use of Data2VecAudio adapters (config.add_adapter=True)r   )r'   r(   r  r3  r   rv  r  r   use_weighted_layer_sumr   r|  r   rh  layer_weightsr   rW   classifier_proj_size	projector
num_labels
classifierr  r6   r7   
num_layersr9   s      r:   r(   z/Data2VecAudioForSequenceClassification.__init__  s     6=)f.@.@v  18--1
((!#ejj.Dz.Q!RD6#5#5v7R7RS))F$?$?ARARS 	r;   c                 X    t        j                  dt               | j                          y)z
        Calling this function will disable the gradient computation for the feature encoder so that its parameters will
        not be updated during training.
        r  Nr  r  s    r:   r  z?Data2VecAudioForSequenceClassification.freeze_feature_extractor  r  r;   c                 L    | j                   j                  j                          yr  r  r  s    r:   r  z=Data2VecAudioForSequenceClassification.freeze_feature_encoder  r  r;   c                 P    | j                   j                         D ]	  }d|_         yz
        Calling this function will disable the gradient computation for the base model so that its parameters will not
        be updated during training. Only the classification head will be updated.
        FNr  rr   rs   rt   s     r:   freeze_base_modelz8Data2VecAudioForSequenceClassification.freeze_base_model  *    
 ((335 	(E"'E	(r;   ry   r   r   r   r   r  r   c                 <   ||n| j                   j                  }| j                   j                  rdn|}| j                  |||||      }| j                   j                  rr|t           }t        j                  |d      }t        j                  j                  | j                  d      }	||	j                  ddd      z  j                  d      }n|d   }| j                  |      }||j                  d      }
n| j                  |j                   d   |      }|j#                  d      j%                  dd|j                   d         }d	|| <   |j                  d      |j                  d      j                  dd      z  }
| j'                  |
      }d}|Ft)               } ||j                  d| j                   j*                        |j                  d            }|s|f|t        d z   }||f|z   S |S t-        |||j.                  |j0                  
      S )  
        input_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
            Float values of input raw speech waveform. Values can be obtained by loading a `.flac` or `.wav` audio file
            into an array of type `list[float]`, a `numpy.ndarray` or a `torch.Tensor`, *e.g.* via the torchcodec library
            (`pip install torchcodec`) or the soundfile library (`pip install soundfile`).
            To prepare the array into `input_values`, the [`AutoProcessor`] should be used for padding and conversion
            into a tensor of type `torch.FloatTensor`. See [`Data2VecAudioProcessor.__call__`] for details.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        NTr  r   r   r>   r   rM   r   r  )r7   r  r  r  r  r   stackr   r   r   r  r   rc  r  r  rK  r   r   r   r  r   r  r   rA   r   )r6   ry   r   r   r   r   r  r   rA   norm_weightspooled_outputpadding_maskexpand_padding_maskr  r  loss_fctr  s                    r:   rB   z.Data2VecAudioForSequenceClassification.forward  s!   . &1%<k$++B]B]'+{{'I'ItOc%%)/!5# & 
 ;;--#$ABM!KK1=M==001C1C0LL*\->->r1a-HHMMRSMTM#AJM}5!)..1.5MBB=CVCVWXCY[ijL"."8"8"<"C"CAq-J]J]^_J`"a25M../)--!-4|7G7GA7G7N7S7STVXY7ZZM/')HFKKDKK,B,BCV[[QS_UDY)F)G!HHF)-)9TGf$EvE'!//))	
 	
r;   r  )rE   rF   rG   r(   r  r  r  r   r   r   r   r   r   r   r   rB   rH   rI   s   @r:   r  r    s    "
&C(  26,0/3&*)-B
u||,B
 !.B
 $D>	B

 'tnB
 d^B
 &B
 
u..	/B
 B
r;   r  c                        e Zd Z fdZd Zd Zd Ze	 	 	 	 	 ddee	j                     dee	j                     dee	j                     dee   d	ee   d
ee   deeef   fd       Z xZS )(Data2VecAudioForAudioFrameClassificationc                    t         |   |       t        |d      r|j                  rt	        d      t        |      | _        |j                  dz   }|j                  r0t        j                  t        j                  |      |z        | _        t        j                  |j                  |j                         | _        |j                   | _        | j%                          y )Nr3  zgAudio frame classification does not support the use of Data2VecAudio adapters (config.add_adapter=True)r   )r'   r(   r  r3  r   rv  r  r   r  r   r|  r   rh  r  r   rW   r  r  init_weightsr  s      r:   r(   z1Data2VecAudioForAudioFrameClassification.__init__9  s     6=)f.@.@y  18--1
((!#ejj.Dz.Q!RD))F$6$68I8IJ ++r;   c                 X    t        j                  dt               | j                          yr  r  r  s    r:   r  zAData2VecAudioForAudioFrameClassification.freeze_feature_extractorI  r  r;   c                 L    | j                   j                  j                          yr  r  r  s    r:   r  z?Data2VecAudioForAudioFrameClassification.freeze_feature_encoderU  r  r;   c                 P    | j                   j                         D ]	  }d|_         yr  r  rt   s     r:   r  z:Data2VecAudioForAudioFrameClassification.freeze_base_model\  r  r;   ry   r   r  r   r   r   r   c           	         ||n| j                   j                  }| j                   j                  rdn|}| j                  |||||      }| j                   j                  rr|t           }t        j                  |d      }t        j                  j                  | j                  d      }	||	j                  ddd      z  j                  d      }n|d   }| j                  |      }
d}|\t               } ||
j                  d| j                        t        j                   |j                  d| j                        d            }|s|
f|t        d z   }|S t#        ||
|j$                  |j&                  	      S )
r  NTr  r   r   r>   r   )axisr  )r7   r  r  r  r  r   r  r   r   r   r  r   rc  r  r   r  argmaxr   rA   r   )r6   ry   r   r  r   r   r   r   rA   r  r  r  r  r  s                 r:   rB   z0Data2VecAudioForAudioFrameClassification.forwardd  sj   . &1%<k$++B]B]'+{{'I'ItOc%%)/!5# & 
 ;;--#$ABM!KK1=M==001C1C0LL*\->->r1a-HHMMRSMTM#AJM/')HFKKDOO<ell6;;WY[_[j[jKkrs>tuDY)F)G!HHFM$!//))	
 	
r;   r  )rE   rF   rG   r(   r  r  r  r   r   r   r   r   r   r   r   rB   rH   rI   s   @r:   r  r  7  s     
&C(  26)-,0/3&*9
u||,9
 !.9
 &	9

 $D>9
 'tn9
 d^9
 
u++	,9
 9
r;   r  c                   &     e Zd Zd fd	Zd Z xZS )AMSoftmaxLossc                     t         |           || _        || _        || _        t        j                  t        j                  ||      d      | _	        t        j                         | _        y )NT)rs   )r'   r(   scalemarginr  r   r|  r   randnr&  r   r  )r6   	input_dimr  r  r  r9   s        r:   r(   zAMSoftmaxLoss.__init__  sQ    
$ll5;;y*#EUYZ'')	r;   c                    |j                         }t        j                  j                  | j                  d      }t        j                  j                  |d      }t        j                  ||      }|| j                  z
  }t        j                  j                  || j                        }| j                  t        j                  |j                         ||      z  }| j                  ||      }|S )Nr   r   r   )flattenr   r   	normalizer&  r   mmr  one_hotr  r  wherer   r  )	r6   rA   r  r&  	cos_thetapsionehotr  r  s	            r:   rB   zAMSoftmaxLoss.forward  s    !((!(<//1/EHH]F3	$++%&&vt?ekk&++-iHHyy(r;   )g      >@g?rD   rI   s   @r:   r  r    s    *r;   r  c                   X     e Zd Zd fd	Zdej
                  dej
                  fdZ xZS )	TDNNLayerc                    t         |           |dkD  r|j                  |dz
     n|j                  |   | _        |j                  |   | _        |j
                  |   | _        |j                  |   | _        t        j                  | j                  | j                  z  | j                        | _        t        j                         | _        y )Nr   r   )r'   r(   tdnn_dimr*   r+   tdnn_kernelr"   tdnn_dilationdilationr   r   kernelReLUr4   r5   s      r:   r(   zTDNNLayer.__init__  s    <DqL6??8a<8foo^fNg"OOH5!--h7,,X6ii 0 043C3C CTEVEVW'')r;   rA   r   c                 &   t               rddlm} t               r+t        | j                        rt        j                  d       |j                  dd      }| j                  j                  j                  | j                  | j                  | j                        j                  dd      }t        j                  j                  ||| j                  j                   | j"                        }|j                  dd      }| j%                  |      }|S )Nr   )	LoraLayerzDetected LoRA on TDNNLayer. LoRA weights won't be applied due to optimization. You should exclude TDNNLayer from LoRA's target modules.r   rM   )r  )r   peft.tuners.lorar  r   r  r  r  r?   r&  r   r+   r"   r*   r   r   conv1dr$   r  r4   )r6   rA   r  r&  s       r:   rB   zTDNNLayer.forward  s    2$++y1O &//15##(():):D<L<LdN^N^_iijkmno,,]FDKKDTDT_c_l_l,m%//156r;   rC   )rE   rF   rG   r(   r   r   rB   rH   rI   s   @r:   r  r    s#    $U\\ ell r;   r  zq
    Data2VecAudio Model with an XVector feature extraction head on top for tasks like Speaker Verification.
    c                        e Zd Z fdZd Zd Zd Zdeej                  e
f   fdZe	 	 	 	 	 ddeej                     deej                     d	ee   d
ee   dee   deej                     deeef   fd       Z xZS )Data2VecAudioForXVectorc                    t         |   |       t        |      | _        |j                  dz   }|j
                  r0t        j                  t        j                  |      |z        | _
        t        j                  |j                  |j                  d         | _        t        t!        |j                              D cg c]  }t#        ||       }}t        j$                  |      | _        t        j                  |j                  d   dz  |j(                        | _        t        j                  |j(                  |j(                        | _        t/        |j(                  |j0                        | _        | j5                          y c c}w )Nr   r   r>   rM   )r'   r(   rv  r  r   r  r   r|  r   rh  r  r   rW   r  r  rb   rf  r  ra   tdnnxvector_output_dimrx  r  r  r  	objectiver  )r6   r7   r  ro   tdnn_layersr9   s        r:   r(   z Data2VecAudioForXVector.__init__  s    08--1
((!#ejj.Dz.Q!RD6#5#5vq7IJ5:3v;O5PQy+QQMM+.	!#6??2+>+BFD]D]!^))F$=$=v?X?XY&v'@'@&BSBST Rs   >Fc                 X    t        j                  dt               | j                          yr  r  r  s    r:   r  z0Data2VecAudioForXVector.freeze_feature_extractor  r  r;   c                 L    | j                   j                  j                          yr  r  r  s    r:   r  z.Data2VecAudioForXVector.freeze_feature_encoder  r  r;   c                 P    | j                   j                         D ]	  }d|_         yr  r  rt   s     r:   r  z)Data2VecAudioForXVector.freeze_base_model  r  r;   r2  c                 V    d }| j                   j                  D ]  } |||d      } |S )z?
        Computes the output length of the TDNN layers
        c                     | |z
  |z  dz   S )Nr   r   r9  s      r:   r;  zJData2VecAudioForXVector._get_tdnn_output_lengths.<locals>._conv_out_length  s     !;.69A==r;   r   )r7   r  )r6   r2  r;  r"   s       r:   _get_tdnn_output_lengthsz0Data2VecAudioForXVector._get_tdnn_output_lengths  s:    
	>
  ;;22 	LK,]KKM	L r;   ry   r   r   r   r   r  r   c                    ||n| j                   j                  }| j                   j                  rdn|}| j                  |||||      }| j                   j                  rr|t           }t        j                  |d      }t        j                  j                  | j                  d      }	||	j                  ddd      z  j                  d      }n|d   }| j                  |      }| j                  D ]
  }
 |
|      } |%|j                  d      }|j!                  d      }n| j#                  |j                  d            }| j%                  |      }g }g }t'        |      D ]U  \  }}|j)                  ||d|f   j                  d             |j)                  ||d|f   j!                  d             W t        j                  |      }t        j                  |      }t        j*                  ||gd      }| j-                  |      }| j/                  |      }d}|| j1                  ||      }|s||f|t        d z   }||f|z   S |S t3        ||||j4                  |j6                        S )	r  NTr  r   r   r>   r   )r  r  
embeddingsrA   r   )r7   r  r  r  r  r   r  r   r   r   r  r   rc  r  r  r  r   r=  r  	enumeraterj  catrx  r  r  r   rA   r   )r6   ry   r   r   r   r   r  r   rA   r  
tdnn_layermean_featuresstd_featuresfeat_extract_output_lengthstdnn_output_lengthsro   lengthstatistic_poolingoutput_embeddingsr  r  r  s                         r:   rB   zData2VecAudioForXVector.forward  s   . &1%<k$++B]B]'+{{'I'ItOc%%)/!5# & 
 ;;--#$ABM!KK1=M==001C1C0LL*\->->r1a-HHMMRSMTM#AJM}5)) 	6J&}5M	6 !)..1.5M(,,,3L*.*O*OP^PbPbghPbPi*j'"&"?"?@["\ML&':; J	6$$]1gvg:%>%C%C%C%JK##M!WfW*$=$A$Aa$A$HIJ "KK6M ;;|4L!II}l&CL 223DE!23>>&&1D/07;X;Y3ZZF)-)9TGf$EvE(!//))
 	
r;   r  )rE   rF   rG   r(   r  r  r  r   r   rS  r   r  r   r   r   r   r   r   rB   rH   rI   s   @r:   r	  r	    s    &
&C(eE<L<Lc<Q6R   26,0/3&*)-O
u||,O
 !.O
 $D>	O

 'tnO
 d^O
 &O
 
um#	$O
 O
r;   r	  )r  r  r  r	  rv  r  )Nr   NrQ   )Mr!  r  typingr   r   r   numpyr  r   r   torch.nnr   activationsr	   integrations.deepspeedr
   integrations.fsdpr   modeling_attn_mask_utilsr   r   modeling_flash_attention_utilsr   modeling_layersr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   r   processing_utilsr   utilsr   r   r   configuration_data2vec_audior   integrations.flex_attentionr   r    ModulerK   rS   r^   ri   r}   r   r   r   r   r   r   r   r  r  r  r   r   rS  ndarrayrt  r  rv  r  r  r  r  r  r  r	  __all__r   r;   r:   <module>r3     s  ,   , ,    % ! @ 7 g B 9  G & T T =  !J7 6BII ryy 6299 ")) :1RYY 1*  $(,%II%<<% 
% <<	%
 U\\*% e_% % %%<U/RYY U/pryy 0! : !HZ299 Zz		 $299 > K? K Kd 26tc?tt t U--.	t
 t ZZtn  7  
5 
 
D !"  
t
6 t

t
n p
-I p
p
f f
/K f
 f
RBII .		 @ 
N
: N

N
br;   