
    rh$                        d Z ddlZddlZddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ d	d
lmZmZmZmZmZmZmZmZmZmZmZ ddlmZ  G d de      Z G d de      Z G d dej8                        Z G d dej8                        Z G d deej8                        Z G d de      Z  G d de      Z! G d de      Z" G d dee      Z#e	Z$ G d d e#e      Z% G d! d"e#e      Z& G d# d$e      Z' G d% d&e      Z( G d' d(e      Z)g d)Z*y)*zPyTorch Data2VecText model.    N)nn   )ACT2FN)GradientCheckpointingLayer)Wav2Vec2BaseModelOutput)PreTrainedModel   )Wav2Vec2AdapterWav2Vec2EncoderWav2Vec2FeatureEncoderWav2Vec2FeatureProjection#Wav2Vec2ForAudioFrameClassificationWav2Vec2ForCTC!Wav2Vec2ForSequenceClassificationWav2Vec2ForXVectorWav2Vec2ModelWav2Vec2PreTrainedModelWav2Vec2SamePadLayer   )Data2VecAudioConfigc                   &     e Zd Zd fd	Zd Z xZS )Data2VecAudioConvLayerc                    t         |           |dkD  r|j                  |dz
     nd| _        |j                  |   | _        t        j                  | j                  | j                  |j                  |   |j                  |   |j                        | _
        t        j                  | j                  d      | _        t        |j                     | _        y )Nr   r   )kernel_sizestridebiasTelementwise_affine)super__init__conv_dimin_conv_dimout_conv_dimr   Conv1dconv_kernelconv_stride	conv_biasconv	LayerNorm
layer_normr   feat_extract_activation
activation)selfconfiglayer_id	__class__s      /var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/data2vec/modular_data2vec_audio.pyr    zData2VecAudioConvLayer.__init__+   s    <DqL6??8a<8a"OOH5II**84%%h/!!
	 ,,t'8'8TR !?!?@    c                     | j                  |      }|j                  dd      }| j                  |      }|j                  dd      }| j                  |      }|S )N)r(   	transposer*   r,   r-   hidden_statess     r1   forwardzData2VecAudioConvLayer.forward:   sV    		-0%//B76%//B76r2   )r   __name__
__module____qualname__r    r9   __classcell__r0   s   @r1   r   r   *   s    Ar2   r   c                       e Zd Zy)Data2VecAudioPadLayerNr;   r<   r=    r2   r1   rA   rA   E       r2   rA   c                   $     e Zd Z fdZd Z xZS ) Data2VecAudioPositionalConvLayerc                 z   t         |           t        j                  |j                  |j                  |j
                  |j
                  dz  |j                        | _        t        |j
                        | _	        t        |j                     | _        t        j                  |j                  d      | _        y )Nr	   )r   paddinggroupsFr   )r   r    r   r$   hidden_sizeconv_pos_kernel_sizenum_conv_pos_embedding_groupsr(   rA   rH   r   r+   r,   r)   r*   )r-   r.   r0   s     r1   r    z)Data2VecAudioPositionalConvLayer.__init__J   s    II33//1477
	 -V-H-HI !?!?@,,v'9'9eTr2   c                     | j                  |      }| j                  |      }|j                  dd      }| j                  |      }|j                  dd      }| j	                  |      }|S Nr   r	   )r(   rH   r6   r*   r,   r7   s     r1   r9   z(Data2VecAudioPositionalConvLayer.forwardY   sd    		-0]3%//156%//156r2   r:   r?   s   @r1   rF   rF   I   s    Ur2   rF   c                   $     e Zd Z fdZd Z xZS )$Data2VecAudioPositionalConvEmbeddingc                     t         |           t        j                  t	        |j
                        D cg c]  }t        |       c}      | _        y c c}w )N)r   r    r   
ModuleListrangenum_conv_pos_embeddingsrF   layers)r-   r.   _r0   s      r1   r    z-Data2VecAudioPositionalConvEmbedding.__init__e   s@    mm?DVEcEc?de!-f5e
es   Ac                     |j                  dd      }| j                  D ]
  } ||      } |j                  dd      }|S rN   )r6   rU   )r-   r8   layers      r1   r9   z,Data2VecAudioPositionalConvEmbedding.forwardk   sI    %//15[[ 	1E!-0M	1%//15r2   r:   r?   s   @r1   rP   rP   d   s    
r2   rP   c                       e Zd Zd Zy)Data2VecAudioFeatureEncoderc           	          t         j                  j                          t        j                  t	        |j
                        D cg c]  }t        ||       c}      | _        d| _        d| _	        y c c}w )N)r/   FT)
r   Moduler    rR   rS   num_feat_extract_layersr   conv_layersgradient_checkpointing_requires_grad)r-   r.   is      r1   r    z$Data2VecAudioFeatureEncoder.__init__t   s\    
		==AFvGeGeAfgA#FQ7g
 ',#" hs   A4N)r;   r<   r=   r    rC   r2   r1   rZ   rZ   s   s    #r2   rZ   c                       e Zd Zy)Data2VecAudioFeatureProjectionNrB   rC   r2   r1   rc   rc   }   rD   r2   rc   c                       e Zd Zy)Data2VecAudioEncoderNrB   rC   r2   r1   re   re      rD   r2   re   c                       e Zd Zy)Data2VecAudioAdapterNrB   rC   r2   r1   rg   rg      rD   r2   rg   c                   H    e Zd ZU eed<   dZdZdZdZdZ	dZ
d Zd Zd Zd Zy	)
Data2VecAudioPreTrainedModelr.   data2vec_audioinput_valuesTc                    t        |t              rt        j                  d|j                  j
                  z        }t        j                  j                  |j                  j                  | |       t        j                  j                  |j                  j                  | |       yt        |t              r5t        j                  j                  |j                  j                  d       yt        |t        j                        rm|j                  j                  j!                  d| j"                  j$                         |j                  %|j                  j                  j'                          yyt        |t        j(                  t        j*                  f      rc|j                  $|j                  j                  j'                          |j                  &|j                  j                  j-                  d       yyt        |t        j.                        rt        j                  j1                  |j                         |j                  jt        j                  |j2                  |j4                  |j6                  d   z  z        }t        j                  j                  |j                  | |       yyy)zInitialize the weightsr   )abr           )meanstdNg      ?)
isinstancerc   mathsqrt
projectionin_featuresr   inituniform_weightr   rF   	constant_r(   Lineardatanormal_r.   initializer_rangezero_r)   	GroupNormfill_r$   kaiming_normal_rI   in_channelsr   )r-   moduleks      r1   _init_weightsz*Data2VecAudioPreTrainedModel._init_weights   s   f<=		!f//;;;<AGGV..55!qAGGV..33rQ? @AGGfkk..2		*MM&&CT[[5R5R&S{{&  &&( 'r|| <={{&  &&(}}(""((- )		*GG##FMM2{{&IIfmmv/A/AFDVDVWXDY/YZ[  a 8 ' +r2   c                     t        d      NzNot needed for Data2VecAudioAttributeErrorr-   s    r1   _get_adaptersz*Data2VecAudioPreTrainedModel._get_adapters       ;<<r2   c                     t        d      r   r   r   s    r1   init_adapter_layersz0Data2VecAudioPreTrainedModel.init_adapter_layers   r   r2   c                     t        d      r   r   r   s    r1   load_adapterz)Data2VecAudioPreTrainedModel.load_adapter   r   r2   N)r;   r<   r=   r   __annotations__base_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_supports_sdpa_supports_flex_attnr   r   r   r   rC   r2   r1   ri   ri      s>    ($O&*#N92===r2   ri   c                   6     e Zd ZdefdZd Zd Z fdZ xZS )Data2VecAudioModelr.   c                    t         j                  |       || _        t        |      | _        t        |      | _        |j                  dkD  s|j                  dkD  rEt        j                  t        j                  |j                        j                               | _        t!        |      | _        |j$                  rt'        |      nd | _        | j+                          y )Nro   )ri   r    r.   rZ   feature_extractorrc   feature_projectionmask_time_probmask_feature_probr   	ParametertorchTensorrJ   rx   masked_spec_embedre   encoderadd_adapterrg   adapter	post_init)r-   r.   s     r1   r    zData2VecAudioModel.__init__   s    $--f5!<V!D"@"H   3&&*B*BS*H%'\\%,,v?Q?Q2R2[2[2]%^D"+F37=7I7I+F3t 	r2   c                     t        d      r   r   r   s    r1   freeze_feature_extractorz+Data2VecAudioModel.freeze_feature_extractor   r   r2   c                 8    | j                   j                          y)z
        Calling this function will disable the gradient computation for the feature encoder so that its parameter will
        not be updated during training.
        N)r   _freeze_parametersr   s    r1   freeze_feature_encoderz)Data2VecAudioModel.freeze_feature_encoder   s    
 	113r2   c                 "    t        |   di |S NrC   r   r9   r-   super_kwargsr0   s     r1   r9   zData2VecAudioModel.forward       w...r2   )	r;   r<   r=   r   r    r   r   r9   r>   r?   s   @r1   r   r      s$    2 "=4/ /r2   r   c                   0     e Zd Zd Zd Zd Z fdZ xZS )Data2VecAudioForCTCc                    t         j                  |       t        |      | _        t	        j
                  |j                        | _        |j                  t        d| j                   d      t        |d      r|j                  r|j                  n|j                  }t	        j                  ||j                        | _        | j#                          y )NzYou are trying to instantiate z with a configuration that does not define the vocabulary size of the language model head. Please instantiate the model as follows: `Data2VecAudioForCTC.from_pretrained(..., vocab_size=vocab_size)`. or define `vocab_size` of your model's configuration.r   )ri   r    r   rj   r   Dropoutfinal_dropoutdropout
vocab_size
ValueErrorr0   hasattrr   output_hidden_sizerJ   r{   lm_headr   )r-   r.   r   s      r1   r    zData2VecAudioForCTC.__init__   s    $--f508zz&"6"67$00@ AH H  *1)GFL^L^F%%djdvdv 	 yy!3V5F5FG 	r2   c                     t        d      r   r   r   s    r1   freeze_base_modelz%Data2VecAudioForCTC.freeze_base_model   r   r2   c                     t        d      r   r   r   s    r1   tie_weightszData2VecAudioForCTC.tie_weights   r   r2   c                 "    t        |   di |S r   r   r   s     r1   r9   zData2VecAudioForCTC.forward   r   r2   )r;   r<   r=   r    r   r   r9   r>   r?   s   @r1   r   r      s    *==/ /r2   r   c                       e Zd Zy)&Data2VecAudioForSequenceClassificationNrB   rC   r2   r1   r   r      rD   r2   r   c                       e Zd Zy)(Data2VecAudioForAudioFrameClassificationNrB   rC   r2   r1   r   r      rD   r2   r   c                       e Zd Zy)Data2VecAudioForXVectorNrB   rC   r2   r1   r   r      rD   r2   r   )r   r   r   r   r   ri   )+__doc__rs   r   r   activationsr   modeling_layersr   modeling_outputsr   modeling_utilsr   wav2vec2.modeling_wav2vec2r
   r   r   r   r   r   r   r   r   r   r   configuration_data2vec_audior   r   rA   r\   rF   rP   rZ   rc   re   rg   ri   Data2VecAudioBaseModelOutputr   r   r   r   r   __all__rC   r2   r1   <module>r      s   "    ! 9 7 -    >7 6	0 	ryy 6299 #"8")) #	%> 		? 		? 	)=?4K )=X  7 /5} /@/6 /@	-N 		/R 		0 	r2   