
    rhI                        d Z ddlZddlZddlmZmZ ddlZddlZddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZm Z  ddl!m"Z" dZ# G d de      Z$ G d de      Z% G d de      Z& G d dejN                        Z( G d de      Z) G d dejN                        Z* G d de      Z+ G d de+      Z, G d  d!e      Z- G d" d#e      Z. G d$ d%e      Z/ G d& d'ejN                        Z0e G d( d)e             Z1e G d* d+e1             Z2 G d, d-e      Z3 G d. d/e      Z4g d0Z5y)1zPyTorch SEW model.    N)OptionalUnion)nn   )ACT2FN)is_deepspeed_zero3_enabled)is_fsdp_managed_module)BaseModelOutput)PreTrainedModel)auto_docstring   )Wav2Vec2AttentionWav2Vec2EncoderLayerWav2Vec2FeatureEncoderWav2Vec2FeedForwardWav2Vec2ForCTC!Wav2Vec2ForSequenceClassificationWav2Vec2GroupNormConvLayerWav2Vec2LayerNormConvLayerWav2Vec2NoLayerNormConvLayerWav2Vec2SamePadLayer_compute_mask_indices   )	SEWConfigc                       e Zd Zy)SEWNoLayerNormConvLayerN__name__
__module____qualname__     v/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/sew/modular_sew.pyr   r   2       r"   r   c                       e Zd Zy)SEWLayerNormConvLayerNr   r!   r"   r#   r&   r&   6   r$   r"   r&   c                       e Zd Zy)SEWGroupNormConvLayerNr   r!   r"   r#   r(   r(   :   r$   r"   r(   c                   $     e Zd Z fdZd Z xZS )SEWPositionalConvEmbeddingc                    t         |           t        j                  |j                  |j                  |j
                  |j
                  dz  |j                  |j                        | _        t        j                  j                  }t        t        j                  j                  d      r$t        j                  j                  j                  }t               r(dd l}|j                  j!                  | j                  j"                  d      5   || j                  dd      | _        d d d        t        | j                  d      rU| j                  j                  j"                  j$                  }| j                  j                  j"                  j&                  }n,| j                  j(                  }| j                  j*                  }|j                  j-                  | |       |j                  j-                  | |       n || j                  dd      | _        t/        |j
                        | _        t2        |j4                     | _        y # 1 sw Y   'xY w)	Nr   )kernel_sizepaddinggroupsstrideweight_normr   modifier_rankweight)namedimparametrizations)super__init__r   Conv1dhidden_sizenum_conv_pos_embeddingsnum_conv_pos_embedding_groupssqueeze_factorconvutilsr0   hasattrr6   r   	deepspeedzeroGatheredParametersr3   	original0	original1weight_gweight_vregister_external_parameterSEWSamePadLayerr-   r   feat_extract_activation
activation)selfconfigr0   rA   rF   rG   	__class__s         r#   r8   z#SEWPositionalConvEmbedding.__init__?   s   II6622a777((
	 hh**288,,m<((33??K%'224993C3CST2U I'		aH	Ityy"459955<<FF9955<<FF99--99--NN66tXFNN66tXF#DIIH!DDI&v'E'EF !?!?@I Is   IIc                 l    | j                  |      }| j                  |      }| j                  |      }|S N)r>   r-   rK   )rL   hidden_statess     r#   forwardz"SEWPositionalConvEmbedding.forwarda   s2    		-0]36r"   r   r   r    r8   rR   __classcell__rN   s   @r#   r*   r*   >   s     ADr"   r*   c                       e Zd Zy)rI   Nr   r!   r"   r#   rI   rI   i   r$   r"   rI   c                   $     e Zd Z fdZd Z xZS )SEWUpsamplingc                     t         |           t        j                  |j                  |j                  |j
                  z        | _        t        |j                     | _	        |j
                  | _        y rP   )
r7   r8   r   Linearr:   r=   
projectionr   rJ   rK   rL   rM   rN   s     r#   r8   zSEWUpsampling.__init__n   sW    ))F$6$68J8JVMbMb8bc !?!?@$33r"   c                 .   | j                  |      }| j                  |      }| j                  dkD  rc|j                         \  }}}|| j                  z  }|| j                  z  }|j	                  ||| j                  |      }|j	                  |||      }|S )Nr   )r[   rK   r=   sizereshape)rL   rQ   bszsrc_lensrc_embed_dimtgt_lentgt_embed_dims          r#   rR   zSEWUpsampling.forwardt   s    66"*7*<*<*>'C- 3 33G)T-@-@@M)11#w@S@SUbcM)11#wNMr"   rS   rU   s   @r#   rX   rX   m   s    4r"   rX   c                       e Zd Zy)SEWFeatureEncoderNr   r!   r"   r#   rf   rf      r$   r"   rf   c                        e Zd Z fdZ xZS )SEWFeatureExtractorc                     t         |   |       t        j                  d| j                  j
                   d| j                  j                  d   j
                   dt               y )NzThe class `zD` has been depreciated and will be removed in Transformers v5. Use `r   z
` instead.)r7   r8   warningswarnrN   r   	__bases__FutureWarningr\   s     r#   r8   zSEWFeatureExtractor.__init__   s[     $..112 3NN,,Q/889E 		
r"   )r   r   r    r8   rT   rU   s   @r#   rh   rh      s    
 
r"   rh   c                       e Zd Zy)SEWAttentionNr   r!   r"   r#   ro   ro      r$   r"   ro   c                       e Zd Zy)SEWFeedForwardNr   r!   r"   r#   rq   rq      r$   r"   rq   c                       e Zd Zy)SEWEncoderLayerNr   r!   r"   r#   rs   rs      r$   r"   rs   c                   .     e Zd Z fdZ	 	 	 	 ddZ xZS )
SEWEncoderc                    t         |           || _        t        |      | _        t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                        | _        t        j                   t#        |j$                        D cg c]  }t'        |       c}      | _        t+        |      | _        d| _        y c c}w )NepsF)r7   r8   rM   r*   pos_conv_embedr   	AvgPool1dr=   pool	LayerNormr:   layer_norm_eps
layer_normDropouthidden_dropoutdropout
ModuleListrangenum_hidden_layersrs   layersrX   upsamplegradient_checkpointing)rL   rM   _rN   s      r#   r8   zSEWEncoder.__init__   s    8@LL!6!68M8MN	,,v'9'9v?T?TUzz&"7"78mmeFLdLdFe$f_V%<$fg%f-&+# %gs   Dc           	      2   |rdnd }|rdnd }||j                  d      j                  dd|j                  d         }| j                  j                  dk(  rd|| <   |d|v r|nd }ngd|| <   |j                         j                  d      }	|	| j                  j                  z  }
|j                  d   | j                  j                  z  }t        j                  d||
j                        j                  dd      j                  |
j                  d   d      }||
j                  dd      k  j                         }d	|d d d d d d f   j                  |j                  
      z
  }|t        j                  |j                        j                   z  }|j                  |j                  d   d|j                  d   |j                  d         }|j                  d   }|j#                  dd      }| j%                  |      }| j'                  |      }t!        |j)                  d      |j)                  d            }|dd |f   |dd |f   z   }|j#                  dd      }| j+                  |      }| j-                  |      }t/               xs t1        |       }| j2                  D ]j  }|r||fz   }t        j4                  g       }| j6                  xr || j                  j8                  k  }|r|r ||||      }|d   }|rd}|sb|d   fz   }l |r||fz   }| j;                  |      }|j                  d   |k  r4t<        j>                  jA                  |ddd||j                  d   z
  f      }|stC        d |||fD              S tE        |||      S )Nr!   r   r   flash_attention_2        r   device      ?)dtype.)attention_maskoutput_attentionsNNc              3   &   K   | ]	  }||  y wrP   r!   ).0vs     r#   	<genexpr>z%SEWEncoder.forward.<locals>.<genexpr>   s     mq_`_lms   last_hidden_staterQ   
attentions)#	unsqueezerepeatshaperM   _attn_implementationlongsumr=   torcharanger   viewexpandtor   finfomin	transposery   r{   r^   r~   r   r   r	   r   randtraining	layerdropr   r   
functionalpadtupler
   )rL   rQ   r   r   output_hidden_statesreturn_dictall_hidden_statesall_self_attentionsexpand_attention_maskinput_lengthsoutput_lengthsmax_encoder_lengthattention_idsn_input_timestepsposition_embeddingspooled_hidden_states
min_lengthsynced_gpuslayerdropout_probabilityskip_the_layerlayer_outputss                         r#   rR   zSEWEncoder.forward   s    #7BD$5b4%$2$<$<R$@$G$G1mNaNabcNd$e!{{//3FF8;4454B4NSTXfSfmq 9<445!/!4!4!6 ; ;B ?!.$++2L2L!L%2%8%8%;t{{?Y?Y%Y"LL$6~?T?TUT!R[VN003R8 
 #0.2E2Eb!2L"L!R!R!T "%~atQ6F'G'J'JQ^QdQd'J'e!e!/%++m>Q>Q2R2V2V!V!/!6!6"((+Q0D0DR0H.J^J^_aJb" *//2%//15"11-@#yy7,11"57K7P7PQS7TU
,S+:+-=>ATUXZe[eZeUeAff%//156]302R6LT6R[[ 	PE#$58H$H! #(**R.!]]Z/BT[[EZEZ/ZN![ %!.Te! !.a 0 , &9]1=M<O&O#'	P*   1]4D Dm4q!$55MM--maAGX[h[n[nop[qGq=rsMm]4EGZ$[mmm++*
 	
r"   )NFFTrS   rU   s   @r#   ru   ru      s    	, "W
r"   ru   c                       e Zd ZU eed<   dZdZdZdZdZ	dZ
d Zdeej                  ef   fdZd	ed
ej                  fdZy)SEWPreTrainedModelrM   sewinput_valuesTFc           
         t        |t              rt        j                  j	                  |j
                  j                  ddt        j                  d|j
                  j                  d   |j
                  j                  z  z        z         t        j                  j                  |j
                  j                  d       nt        |t        j                        r=|j                  j                  j	                  d| j                  j                          nt        |t        j"                  t        j$                  f      rK|j                  j                  j'                          |j                  j                  j)                  d       nHt        |t        j*                        r-t-               rddl}t1        |d      r|t1        |d	      rp|j2                  j5                  |j6                  |j8                  gd
      5  t        j                  j;                  |j                  j                         ddd       n|j2                  j5                  |j                  d
      5  t        j                  j;                  |j                  j                         ddd       n3t        j                  j;                  |j                  j                         t        |t        j                  t        j*                  f      r2|j                  %|j                  j                  j'                          yyy# 1 sw Y   fxY w# 1 sw Y   rxY w)zInitialize the weightsr   r   r   )meanstdr   r   NrG   rF   r1   )
isinstancer*   r   initnormal_r>   r3   mathsqrtr,   in_channels	constant_biasrZ   datarM   initializer_ranger|   	GroupNormzero_fill_r9   r   rA   r@   rB   rC   rG   rF   kaiming_normal_)rL   modulerA   s      r#   _init_weightsz SEWPreTrainedModel._init_weights  sB   f89GGOO""		!v{{'>'>q'AFKKD[D['["\]]  
 GGfkk..2		* MM&&CT[[5R5R&Sr|| <=KK""$MM$$S)		*)+ 6:.76:3N"::FOOV__;]mn:o D//0B0BCD D #::6==XY:Z D//0B0BCD D ''(:(:;fryy"))45&++:QKK""$ ;R5D DD Ds   4L5(4M5L>M
r   c                     d }t        | j                  j                  | j                  j                        D ]  \  }} ||||      } |S )zH
        Computes the output length of the convolutional layers
        c                 >    t        j                  | |z
  |d      dz   S )Nfloor)rounding_moder   )r   div)input_lengthr,   r/   s      r#   _conv_out_lengthzMSEWPreTrainedModel._get_feat_extract_output_lengths.<locals>._conv_out_length3  s"     99\K7wWZ[[[r"   )ziprM   conv_kernelconv_stride)rL   r   r   r,   r/   s        r#    _get_feat_extract_output_lengthsz3SEWPreTrainedModel._get_feat_extract_output_lengths.  sQ    
	\
 $'t{{'>'>@W@W#X 	QK,]KPM	Q r"   feature_vector_lengthr   c                    | j                  |j                  d            j                  t        j                        }|j
                  d   }t        j                  ||f|j                  |j                        }d|t        j                  |j
                  d   |j                        |dz
  f<   |j                  dg      j                  d      j                  dg      j                         }|S )Nr   r   )r   r   r   r   )r   r   r   r   r   r   zerosr   r   r   flipcumsumbool)rL   r   r   r   
batch_sizes        r#   "_get_feature_vector_attention_maskz5SEWPreTrainedModel._get_feature_vector_attention_mask=  s    >>~?Q?QRT?UVYYZ_ZdZde#))!,
./~7K7KTbTiTi
 uv^%9%9!%<^EZEZ[]kno]opq',,bT299"=BBB4HMMOr"   N)r   r   r    r   __annotations__base_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_supports_sdpa_supports_flex_attnr   r   r   
LongTensorintr   r   r!   r"   r#   r   r     sg    $O&*#N%@eEDTDTVYDY>Z 
 
]b]m]m 
r"   r   c                   *    e Zd Zdef fdZ	 	 ddej                  deej                     deej                     fdZ	e
	 	 	 	 	 ddeej                     deej                     deej                     dee   d	ee   d
ee   deeef   fd       Z xZS )SEWModelrM   c                    t         |   |       || _        t        |      | _        t        j                  |j                  d   |j                        | _	        |j                  d   |j                  k7  | _        | j                  r2t        j                  |j                  d   |j                        | _        t        j                  |j                        | _        |j"                  dkD  s|j$                  dkD  rEt        j&                  t)        j*                  |j                        j-                               | _        t1        |      | _        | j5                          y )Nr   rw   r   )r7   r8   rM   rf   feature_extractorr   r|   conv_dimr}   r~   r:   project_featuresrZ   feature_projectionr   feat_proj_dropoutfeature_dropoutmask_time_probmask_feature_prob	Parameterr   Tensoruniform_masked_spec_embedru   encoder	post_initr\   s     r#   r8   zSEWModel.__init__L  s     !26!:,,vr':@U@UV & 3v7I7I I  &(ii0CVEWEW&XD#!zz&*B*BC  3&&*B*BS*H%'\\%,,v?Q?Q2R2[2[2]%^D"!&) 	r"   rQ   mask_time_indicesr   c                    t        | j                  dd      s|S |j                         \  }}}|)| j                  j	                  |j
                        ||<   n| j                  j                  dkD  r| j                  rt        ||f| j                  j                  | j                  j                  || j                  j                        }t        j                  ||j                  t        j                        }| j                  j	                  |j
                        ||<   | j                  j                  dkD  r| j                  rt        ||f| j                  j                  | j                  j                   | j                  j"                        }t        j                  ||j                  t        j                        }|dddf   j%                  d|d      }d||<   |S )	z
        Masks extracted features along time axis and/or along feature axis according to
        [SpecAugment](https://huggingface.co/papers/1904.08779).
        apply_spec_augmentTNr   )	mask_probmask_lengthr   	min_masks)r   r   )r  r	  r
  r   )getattrrM   r^   r  r   r   r   r   r   mask_time_lengthmask_time_min_masksr   tensorr   r   r   mask_feature_lengthmask_feature_min_masksr   )rL   rQ   r  r   r   sequence_lengthr:   mask_feature_indicess           r#   _mask_hidden_stateszSEWModel._mask_hidden_states`  s    t{{$8$?   4A3E3E3G0
O[(/3/E/E/H/HI\I\/]M+,[[''!+ 5_-++44 KK88-++99! !&->}G[G[chcmcm n/3/E/E/H/HI\I\/]M+,;;((1,#8[)++77 KK;;++<<	$  $)<<0D]MaMainisis#t #74#@#G#GO]_#` 23M./r"   r   r   r   r   returnc                 Z   ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }| j	                  |      }|j                  dd      }| j                  |      }| j                  r| j                  |      }| j                  |      }|| j                  |j                  d   |      }| j                  ||      }| j                  |||||      }	|	d   }|s	|f|	dd z   S t        ||	j                  |	j                         S )a/  
        mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Indices to mask extracted features for contrastive loss. When in training mode, model learns to predict
            masked extracted features in *config.proj_codevector_dim* space.
        Nr   r   )r  )r   r   r   r   r   r   )rM   r   r   use_return_dictr   r   r~   r   r   r   r   r   r  r  r
   rQ   r   )
rL   r   r   r  r   r   r   extract_featuresrQ   encoder_outputss
             r#   rR   zSEWModel.forward  sU    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]11,?+55a;??+;<  #667GH,,-=>%!DD]EXEXYZE[]klN00Rc0d,,)/!5# ' 
 (*!#oab&999+)77&11
 	
r"   r   )NNNNN)r   r   r    r   r8   r   FloatTensorr   r   r  r   r   r   r   r   r
   rR   rT   rU   s   @r#   r   r   J  s    y . :>59	,((, $E$5$56, !!1!12	,\  269=,0/3&*3
u||,3
 !.3
 $E$5$56	3

 $D>3
 'tn3
 d^3
 
uo%	&3
 3
r"   r   c                       e Zd Zy)	SEWForCTCNr   r!   r"   r#   r  r    r$   r"   r  c                       e Zd Zy)SEWForSequenceClassificationNr   r!   r"   r#   r  r    r$   r"   r  )r  r  r   r   )6__doc__r   rj   typingr   r   r   torch.utils.checkpointr   activationsr   integrations.deepspeedr   integrations.fsdpr	   modeling_outputsr
   modeling_utilsr   r?   r   wav2vec2.modeling_wav2vec2r   r   r   r   r   r   r   r   r   r   r   configuration_sewr   _HIDDEN_STATES_START_POSITIONr   r&   r(   Moduler*   rI   rX   rf   rh   ro   rq   rs   ru   r   r   r  r  __all__r!   r"   r#   <module>r+     sX      "    ! @ 7 / - #    ) !" 	: 		6 		6 	( (V	* 	BII ,	. 	
+ 
	$ 		( 		* 	c
 c
L B B BJ w
! w
 w
t	 		#D 	 Zr"   