
    rh.                        d Z ddlmZmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ d
dlmZmZmZmZmZmZmZ ddlmZ dZ G d dej8                        Z G d de      Z G d de      Z G d dej8                        Z  G d de      Z! G d de      Z"e G d de             Z# G d dee#      Z$ G d de      Z% G d  d!e      Z&g d"Z'y)#zPyTorch Hubert model.    )OptionalUnionN   )ACT2FN)is_deepspeed_zero3_enabled)BaseModelOutput)PreTrainedModel)auto_docstring   )Wav2Vec2EncoderWav2Vec2EncoderStableLayerNormWav2Vec2FeatureEncoderWav2Vec2ForCTC!Wav2Vec2ForSequenceClassificationWav2Vec2ModelWav2Vec2SamePadLayer   )HubertConfigc                   $     e Zd Z fdZd Z xZS )HubertPositionalConvEmbeddingc                    t         |           t        j                  |j                  |j                  |j
                  |j
                  dz  |j                        | _        d | _        |j                  r&t        j                  |j                        | _        nt        j                  j                  }t        t        j                  j                  d      r$t        j                  j                  j                  }t               r(dd l}|j"                  j%                  | j                  j&                  d      5   || j                  dd      | _        d d d        t        | j                  d      rU| j                  j                  j&                  j(                  }| j                  j                  j&                  j*                  }n,| j                  j,                  }| j                  j.                  }|j"                  j1                  | |       |j"                  j1                  | |       n || j                  dd      | _        t3        |j
                        | _        t6        |j8                     | _        y # 1 sw Y   'xY w)	Nr   )kernel_sizepaddinggroupsweight_normr   modifier_rankweight)namedimparametrizations)super__init__nnConv1dhidden_sizenum_conv_pos_embeddingsnum_conv_pos_embedding_groupsconv
batch_normconv_pos_batch_normBatchNorm1dutilsr   hasattrr!   r   	deepspeedzeroGatheredParametersr   	original0	original1weight_gweight_vregister_external_parameterHubertSamePadLayerr   r   feat_extract_activation
activation)selfconfigr   r/   r4   r5   	__class__s         |/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/hubert/modular_hubert.pyr#   z&HubertPositionalConvEmbedding.__init__+   s   II6622a777
	 %% nnV-?-?@DO((..Krxx00-@ hh77CC)+ ^^66tyy7G7GWX6Y M +DIIH! LDIM499&89#yy99@@JJH#yy99@@JJH#yy11H#yy11H::4J::4J'		aH	)&*H*HI !?!?@M Ms   ?I??J	c                     |j                  dd      }| j                  | j                  |      }| j                  |      }| j                  |      }| j	                  |      }|j                  dd      }|S )Nr   r   )	transposer*   r)   r   r9   r:   hidden_statess     r=   forwardz%HubertPositionalConvEmbedding.forwardP   sn    %//15??& OOM:M		-0]36%//15    __name__
__module____qualname__r#   rB   __classcell__r<   s   @r=   r   r   *   s    #AJ	rC   r   c                       e Zd Zy)r7   NrE   rF   rG    rC   r=   r7   r7   \       rC   r7   c                       e Zd Zy)HubertFeatureEncoderNrK   rL   rC   r=   rO   rO   `   rM   rC   rO   c                   $     e Zd Z fdZd Z xZS )HubertFeatureProjectionc                 n   t         |           |j                  | _        | j                  r3t        j                  |j
                  d   |j                        | _        t        j                  |j
                  d   |j                        | _
        t        j                  |j                        | _        y )N)eps)r"   r#   feat_proj_layer_normr$   	LayerNormconv_dimlayer_norm_eps
layer_normLinearr&   
projectionDropoutfeat_proj_dropoutdropoutr:   r;   r<   s     r=   r#   z HubertFeatureProjection.__init__e   s}    $*$?$?!$$ ll6??2+>FDYDYZDO))FOOB$79K9KLzz&":":;rC   c                     | j                   r| j                  |      }| j                  |      }| j                  |      }|S )N)rU   rY   r[   r^   r@   s     r=   rB   zHubertFeatureProjection.forwardm   s;    $$ OOM:M6]3rC   rD   rI   s   @r=   rQ   rQ   d   s    <rC   rQ   c                       e Zd Zy)HubertEncoderNrK   rL   rC   r=   rb   rb   v   rM   rC   rb   c                       e Zd Zy)HubertEncoderStableLayerNormNrK   rL   rC   r=   rd   rd   z   rM   rC   rd   c                       e Zd ZU eed<   dZdZdZdZdZ	dZ
d Zdeej                  ef   fdZded	ej                  fd
Zy)HubertPreTrainedModelr;   hubertinput_valuesTc                 z   t        |t        j                        rm|j                  j                  j                  d| j                  j                         |j                  %|j                  j                  j                          yyt        |t        j                  t        j                  t        j                  f      rJ|j                  j                  j                          |j                  j                  j                  d       yt        |t        j                        r_t               rddl}t#        |d      r|t#        |d      rp|j$                  j'                  |j(                  |j*                  gd      5  t        j,                  j/                  |j                  j                         ddd       n|j$                  j'                  |j                  d      5  t        j,                  j/                  |j                  j                         ddd       n3t        j,                  j/                  |j                  j                         |j                  %|j                  j                  j                          yyt        |t0              r2t#        |d	      r%|j2                  j                  j5                          yyt        |t6              rMt#        |d
      r@|j8                  j                  j                  d| j                  j:                  dz   z         yyy# 1 sw Y   xY w# 1 sw Y   xY w)zInitialize the weights        )meanstdNg      ?r   r5   r4   r   masked_spec_embedlayer_weightsr   )
isinstancer$   rZ   r   datanormal_r;   initializer_rangebiaszero_rV   	GroupNormr,   fill_r%   r   r/   r.   r0   r1   r5   r4   initkaiming_normal_HubertModelrm   uniform_HubertForSequenceClassificationrn   num_hidden_layers)r:   moduler/   s      r=   _init_weightsz#HubertPreTrainedModel._init_weights   sP   fbii( MM&&CT[[5R5R&S{{&  &&( 'r||R^^ LMKK""$MM$$S)		*)+ 6:.76:3N"::FOOV__;]mn:o D//0B0BCD D #::6==XY:Z D//0B0BCD D ''(:(:;{{&  &&( ',v23((--668 4 ?@v/$$))//t{{7T7TWX7X0YZ 0 AD DD Ds   ?4L%#4L1%L.1L:input_lengthsc                     d }t        | j                  j                  | j                  j                        D ]  \  }} ||||      } |S )zH
        Computes the output length of the convolutional layers
        c                 >    t        j                  | |z
  |d      dz   S )Nfloor)rounding_moder   )torchdiv)input_lengthr   strides      r=   _conv_out_lengthzPHubertPreTrainedModel._get_feat_extract_output_lengths.<locals>._conv_out_length   s"     99\K7wWZ[[[rC   )zipr;   conv_kernelconv_stride)r:   r   r   r   r   s        r=    _get_feat_extract_output_lengthsz6HubertPreTrainedModel._get_feat_extract_output_lengths   sQ    
	\
 $'t{{'>'>@W@W#X 	QK,]KPM	Q rC   feature_vector_lengthattention_maskc                    | j                  |j                  d            j                  t        j                        }|j
                  d   }t        j                  ||f|j                  |j                        }d|t        j                  |j
                  d   |j                        |dz
  f<   |j                  dg      j                  d      j                  dg      j                         }|S )NrS   r   )dtypedevicer   )r   )r   sumtor   longshapezerosr   r   arangeflipcumsumbool)r:   r   r   output_lengths
batch_sizes        r=   "_get_feature_vector_attention_maskz8HubertPreTrainedModel._get_feature_vector_attention_mask   s    >>~?Q?QRT?UVYYZ_ZdZde#))!,
./~7K7KTbTiTi
 uv^%9%9!%<^EZEZ[]kno]opq',,bT299"=BBB4HMMOrC   N)rE   rF   rG   r   __annotations__base_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_supports_sdpa_supports_flex_attnr~   r   r   
LongTensorintr   r   rL   rC   r=   rf   rf   ~   sh     $O&*#N[BeEDTDTVYDY>Z 
 
]b]m]m 
rC   rf   c                        e Zd Zdef fdZd Zd Z	 	 	 	 	 ddeej                     deej                     deej                     dee   d	ee   d
ee   deeef   fdZ xZS )ry   r;   c                    t         |   |       || _        t        |      | _        t        |      | _        |j                  dkD  s|j                  dkD  rEt        j                  t        j                  |j                        j                               | _        |j                   rt#        |      | _        nt'        |      | _        | j)                          | `y )Nrj   )r"   r#   r;   rO   feature_extractorrQ   feature_projectionmask_time_probmask_feature_probr$   	Parameterr   Tensorr&   rz   rm   do_stable_layer_normrd   encoderrb   	post_initadapterr_   s     r=   r#   zHubertModel.__init__   s     !5f!="9&"A  3&&*B*BS*H%'\\%,,v?Q?Q2R2[2[2]%^D"&&7?DL(0DL 	LrC   c                     t        d      NzNot needed for HubertAttributeErrorr:   s    r=   freeze_feature_extractorz$HubertModel.freeze_feature_extractor       455rC   c                     t        d      r   r   r   s    r=   freeze_feature_encoderz"HubertModel.freeze_feature_encoder   r   rC   rh   r   mask_time_indicesoutput_attentionsoutput_hidden_statesreturn_dictreturnc                    ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }| j	                  |      }|j                  dd      }|| j                  |j                  d   |      }| j                  |      }| j                  ||      }| j                  |||||      }	|	d   }|s	|f|	dd z   S t        ||	j                  |	j                        S )a1  
        mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Indices to mask extracted features for contrastive loss. When in training mode, model learns to predict
            masked extracted features in *config.proj_codevector_dim* space.

        Example:

        ```python
        >>> from transformers import AutoProcessor, HubertModel
        >>> from datasets import load_dataset

        >>> processor = AutoProcessor.from_pretrained("facebook/hubert-large-ls960-ft")
        >>> model = HubertModel.from_pretrained("facebook/hubert-large-ls960-ft")


        >>> def map_to_array(example):
        ...     example["speech"] = example["audio"]["array"]
        ...     return example


        >>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
        >>> ds = ds.map(map_to_array)

        >>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values  # Batch size 1
        >>> hidden_states = model(input_values).last_hidden_state
        ```Nr   r   )r   )r   r   r   r   r   )last_hidden_staterA   
attentions)r;   r   r   use_return_dictr   r?   r   r   r   _mask_hidden_statesr   r   rA   r   )
r:   rh   r   r   r   r   r   extract_featuresrA   encoder_outputss
             r=   rB   zHubertModel.forward   s,   F 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]11,?+55a;%!DDEUE[E[\]E^`noN//0@A00Rc0d,,)/!5# ' 
 (*!#oab&999+)77&11
 	
rC   )NNNNN)rE   rF   rG   r   r#   r   r   r   r   r   FloatTensorr   r   tupler   rB   rH   rI   s   @r=   ry   ry      s    | &66 269=,0/3&*D
u||,D
 !.D
 $E$5$56	D

 $D>D
 'tnD
 d^D
 
uo%	&D
rC   ry   c                       e Zd Zy)HubertForCTCNrK   rL   rC   r=   r   r   &  rM   rC   r   c                       e Zd Zy)r{   NrK   rL   rC   r=   r{   r{   *  rM   rC   r{   )r   r{   ry   rf   )(__doc__typingr   r   r   torch.nnr$   activationsr   integrations.deepspeedr   modeling_outputsr   modeling_utilsr	   r-   r
   wav2vec2.modeling_wav2vec2r   r   r   r   r   r   r   configuration_hubertr   _HIDDEN_STATES_START_POSITIONModuler   r7   rO   rQ   rb   rd   rf   ry   r   r{   __all__rL   rC   r=   <module>r      s     "   ! @ / - #   / !" /BII /d	- 		1 	bii $	O 		#A 	 CO C CL^
-!6 ^
B	> 		&G 	 frC   