Ë
    ¹rœhL ã                   óÆ  — d Z ddlZddlmZ ddlmZmZmZ ddlZddl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZ ddlmZmZ ddlmZ  ej<                  e«      Z  G d„ de
jB                  «      Z" G d„ de
jB                  «      Z# G d„ de
jB                  «      Z$ G d„ de
jB                  «      Z% G d„ de
jB                  «      Z& G d„ de
jB                  «      Z'	 	 	 dyde
jB                  dejP                  dejP                  dejP                  deejP                     d ee)   d!e)d"eejP                     fd#„Z* G d$„ d%e
jB                  «      Z+ G d&„ d'e
jB                  «      Z, G d(„ d)e
jB                  «      Z- G d*„ d+e
jB                  «      Z. G d,„ d-e
jB                  «      Z/ G d.„ d/e
jB                  «      Z0 G d0„ d1e
jB                  «      Z1e G d2„ d3e«      «       Z2 G d4„ d5e
jB                  «      Z3	 	 	 dzd6ejP                  d7e)d8ee4   d9e5d:e6f
d;„Z7	 	 d{d6ejP                  d<ee4e6f   d8ee4   d:e6fd=„Z8 G d>„ d?e
jB                  «      Z9 G d@„ dAe
jB                  «      Z: G dB„ dCe
jB                  «      Z; G dD„ dEe
jB                  «      Z< G dF„ dGe
jB                  «      Z=e edH¬I«       G dJ„ dKe«      «       «       Z> G dL„ dMe2«      Z?e edN¬I«       G dO„ dPe«      «       «       Z@ edQ¬I«       G dR„ dSe2«      «       ZAe edT¬I«       G dU„ dVe«      «       «       ZB edW¬I«       G dX„ dYe2«      «       ZCe edZ¬I«       G d[„ d\e«      «       «       ZDe ed]¬I«       G d^„ d_e«      «       «       ZEe ed]¬I«       G d`„ dae«      «       «       ZFdbejŽ                  j                  dcejP                  ddejP                  fde„ZId|dfejP                  dgeejP                     ddejP                  fdh„ZJ G di„ dje2«      ZKe edk¬I«       G dl„ dme«      «       «       ZL G dn„ doe2«      ZMe edp¬I«       G dq„ dre«      «       «       ZN G ds„ dte
jB                  «      ZO edu¬I«       G dv„ dwe2«      «       ZPg dx¢ZQy)}zPyTorch PatchTSMixer model.é    N)Ú	dataclass)ÚCallableÚOptionalÚUnion)ÚPreTrainedModel)ÚModelOutputé   )ÚFlashAttentionKwargs)ÚALL_ATTENTION_FUNCTIONS)ÚUnpack)ÚNegativeBinomialOutputÚNormalOutputÚStudentTOutput)Úauto_docstringÚloggingé   )ÚPatchTSMixerConfigc                   ó2   ‡ — e Zd ZdZdedefˆ fd„Zd„ Zˆ xZS )ÚPatchTSMixerGatedAttentionz›
    Module that applies gated attention to input data.

    Args:
        in_size (`int`): The input size.
        out_size (`int`): The output size.
    Úin_sizeÚout_sizec                 óŽ   •— t         ‰|   «        t        j                  ||«      | _        t        j
                  d¬«      | _        y )Néÿÿÿÿ©Údim)ÚsuperÚ__init__ÚnnÚLinearÚ
attn_layerÚSoftmaxÚattn_softmax)Úselfr   r   Ú	__class__s      €ú‰/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/patchtsmixer/modeling_patchtsmixer.pyr   z#PatchTSMixerGatedAttention.__init__/   s1   ø€ Ü‰ÑÔÜŸ)™) G¨XÓ6ˆŒÜŸJ™J¨2Ô.ˆÕó    c                 óP   — | j                  | j                  |«      «      }||z  }|S ©N)r"   r    )r#   ÚinputsÚattn_weights      r%   Úforwardz"PatchTSMixerGatedAttention.forward4   s*   € Ø×'Ñ'¨¯©¸Ó(?Ó@ˆØ˜+Ñ%ˆØˆr&   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__Úintr   r+   Ú__classcell__©r$   s   @r%   r   r   &   s    ø„ ñð/ ð /¨sõ /ö
r&   r   c                   óH   ‡ — e Zd ZdZdefˆ fd„Zdej                  fd„Zˆ xZ	S )ÚPatchTSMixerBatchNormzP
    Compute batch normalization over the sequence length (time) dimension.
    Úconfigc                 ó‚   •— t         ‰|   «        t        j                  |j                  |j
                  ¬«      | _        y )N©Úeps)r   r   r   ÚBatchNorm1dÚd_modelÚnorm_epsÚ	batchnorm©r#   r5   r$   s     €r%   r   zPatchTSMixerBatchNorm.__init__@   s(   ø€ Ü‰ÑÔÜŸ™¨¯©¸F¿O¹OÔLˆr&   r)   c                 ól   — |j                  dd«      }| j                  |«      }|j                  dd«      S )a  
        Parameters:
            inputs (`torch.Tensor` of shape `(batch_size, sequence_length, d_model)`):
                input for Batch norm calculation
        Returns:
            `torch.Tensor` of shape `(batch_size, sequence_length, d_model)`
        r   é   )Ú	transposer<   )r#   r)   Úoutputs      r%   r+   zPatchTSMixerBatchNorm.forwardD   s7   € ð ×!Ñ! ! QÓ'ˆØ—‘ Ó'ˆØ×Ñ  1Ó%Ð%r&   ©
r,   r-   r.   r/   r   r   ÚtorchÚTensorr+   r1   r2   s   @r%   r4   r4   ;   s'   ø„ ñðMÐ1õ Mð
&˜eŸl™l÷ 
&r&   r4   c                   óv   ‡ — e Zd ZdZdefˆ fd„Zededej                  fd„«       Z	de
j                  fd„Zˆ xZS )ÚPatchTSMixerPositionalEncodingz'
    Class for positional encoding
    r5   c                 óì   •— t         ‰|   «        |j                  r| j                  |«      | _        y t        j                  t        j                  |j                  |j                  «      «      | _        y r(   )r   r   Úuse_positional_encodingÚ_init_peÚposition_encr   Ú	ParameterrC   ÚzerosÚnum_patchesr:   r=   s     €r%   r   z'PatchTSMixerPositionalEncoding.__init__V   sN   ø€ Ü‰ÑÔà×)Ò)Ø $§¡¨fÓ 5ˆDÕä "§¡¬U¯[©[¸×9KÑ9KÈVÏ^É^Ó-\Ó ]ˆDÕr&   Úreturnc                 ó`  — | j                   dk(  rAt        j                  t        j                  | j
                  | j                  «      d¬«      }|S | j                   dk(  r7t        j                  | j
                  | j                  «      }t        j                  d| j
                  «      j                  d«      }t        j                  t        j                  d| j                  d«      t        j                  d«      | j                  z   z  «      }t        j                  ||z  «      |d d …dd d…f<   t        j                  ||z  «      |d d …dd d…f<   ||j                  «       z
  }||j!                  «       d	z  z  }t        j                  |d
¬«      }|S t#        | j                   › d«      ‚)NÚrandomT©Úrequires_gradÚsincosr   r   r?   g     ˆÃ@é
   FzN is not a valid positional encoder. Available types are 'random' and 'sincos'.)Úpositional_encoding_typer   rK   rC   ÚrandnrM   r:   rL   ÚarangeÚ	unsqueezeÚexpÚmathÚlogÚsinÚcosÚmeanÚstdÚ
ValueError)r5   rJ   ÚpositionÚdiv_terms       r%   rI   z'PatchTSMixerPositionalEncoding._init_pe^   sv  € ð ×*Ñ*¨hÒ6ÜŸ<™<¬¯©°F×4FÑ4FÈÏÉÓ(WÐgkÔlˆLð Ðð ×,Ñ,°Ó8Ü Ÿ;™; v×'9Ñ'9¸6¿>¹>ÓJˆLÜ—|‘| A v×'9Ñ'9Ó:×DÑDÀQÓGˆHÜ—y‘y¤§¡¨a°·±ÀÓ!CÌÏÉÐQXÓHYÐ\b×\jÑ\jÑHjÐFkÑ!kÓlˆHÜ$)§I¡I¨h¸Ñ.AÓ$BˆLš˜A˜D˜q˜D˜Ñ!Ü$)§I¡I¨h¸Ñ.AÓ$BˆLš˜A˜D˜q˜D˜Ñ!Ø'¨,×*;Ñ*;Ó*=Ñ=ˆLØ'¨<×+;Ñ+;Ó+=ÀÑ+BÑCˆLÜŸ<™<¨ÀEÔJˆLð
 Ðô Ø×2Ñ2Ð3ð  4Bð  Cóð r&   Úpatch_inputc                 ó$   — || j                   z   }|S r(   )rJ   )r#   rc   Úhidden_states      r%   r+   z&PatchTSMixerPositionalEncoding.forwardr   s   € à" T×%6Ñ%6Ñ6ˆØÐr&   )r,   r-   r.   r/   r   r   Ústaticmethodr   rK   rI   rC   rD   r+   r1   r2   s   @r%   rF   rF   Q   sN   ø„ ñð^Ð1õ ^ð ðÐ+ð °·±ò ó ðð& 5§<¡<÷ r&   rF   c                   óH   ‡ — e Zd ZdZdefˆ fd„Zdej                  fd„Zˆ xZ	S )ÚPatchTSMixerNormLayerzeNormalization block

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.
    r5   c                 óþ   •— t         ‰|   «        |j                  | _        d|j                  j                  «       v rt	        |«      | _        y t        j                  |j                  |j                  ¬«      | _        y )NÚbatchr7   )
r   r   Únorm_mlpÚlowerr4   Únormr   Ú	LayerNormr:   r;   r=   s     €r%   r   zPatchTSMixerNormLayer.__init__€   sT   ø€ Ü‰ÑÔàŸ™ˆŒàf—o‘o×+Ñ+Ó-Ñ-Ü-¨fÓ5ˆDIäŸ™ V§^¡^¸¿¹ÔIˆDIr&   r)   c                 óf  — d| j                   j                  «       v rƒt        j                  ||j                  d   |j                  d   z  |j                  d   |j                  d   f«      }| j                  |«      }t        j                  ||j                  «      }|S | j                  |«      }|S )a  
        Args:
            inputs (`torch.Tensor` of shape `((batch_size, num_channels, num_patches, d_model))`):
                Input to the normalization layer.
        Returns:
            `torch.Tensor` of shape `((batch_size, num_channels, num_patches, d_model))`
        rj   r   r   r?   r	   )rk   rl   rC   ÚreshapeÚshaperm   )r#   r)   Úinputs_reshapeds      r%   r+   zPatchTSMixerNormLayer.forwardŠ   sŸ   € ð d—m‘m×)Ñ)Ó+Ñ+ä#Ÿm™mØà—L‘L ‘O f§l¡l°1¡oÑ5Ø—L‘L ‘OØ—L‘L ‘OðóˆOð #Ÿi™i¨Ó8ˆOô —]‘] ?°F·L±LÓAˆFð
 ˆð —Y‘Y˜vÓ&ˆFàˆr&   rB   r2   s   @r%   rh   rh   x   s'   ø„ ñðJÐ1õ Jð˜eŸl™l÷ r&   rh   c                   ó>   ‡ — e Zd Zˆ fd„Zdej
                  fd„Zˆ xZS )ÚPatchTSMixerMLPc                 ó<  •— t         ‰|   «        ||j                  z  }t        j                  ||«      | _        t        j                  |j                  «      | _        t        j                  ||«      | _	        t        j                  |j                  «      | _
        y r(   )r   r   Úexpansion_factorr   r   Úfc1ÚDropoutÚdropoutÚdropout1Úfc2Údropout2)r#   Úin_featuresÚout_featuresr5   Ú
num_hiddenr$   s        €r%   r   zPatchTSMixerMLP.__init__ª   sj   ø€ Ü‰ÑÔØ  6×#:Ñ#:Ñ:ˆ
Ü—9‘9˜[¨*Ó5ˆŒÜŸ
™
 6§>¡>Ó2ˆŒÜ—9‘9˜Z¨Ó6ˆŒÜŸ
™
 6§>¡>Ó2ˆr&   r)   c                 óÄ   — | j                  t        j                  j                  | j	                  |«      «      «      }| j                  |«      }| j                  |«      }|S )zì
        Args:
            inputs (`torch.Tensor` of shape `((batch_size, num_channels, num_patches, d_model))`):
                Input to the MLP layer.
        Returns:
            `torch.Tensor` of the same shape as `inputs`
        )rz   r   Ú
functionalÚgelurw   r{   r|   )r#   r)   s     r%   r+   zPatchTSMixerMLP.forward²   sK   € ð —‘œrŸ}™}×1Ñ1°$·(±(¸6Ó2BÓCÓDˆØ—‘˜&Ó!ˆØ—‘˜vÓ&ˆØˆr&   )r,   r-   r.   r   rC   rD   r+   r1   r2   s   @r%   rt   rt   ©   s   ø„ ô3ð˜eŸl™l÷ r&   rt   c                   óH   ‡ — e Zd ZdZdefˆ fd„Zdej                  fd„Zˆ xZ	S )Ú$PatchTSMixerChannelFeatureMixerBlockzŠThis module mixes the features in the channel dimension.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.
    r5   c                 ó  •— t         ‰|   «        t        |«      | _        |j                  | _        t        |j                  |j                  |¬«      | _        |j                  r't        |j                  |j                  ¬«      | _	        y y ©N©r}   r~   r5   ©r   r   )
r   r   rh   rm   Ú
gated_attnrt   Únum_input_channelsÚmlpr   Úgating_blockr=   s     €r%   r   z-PatchTSMixerChannelFeatureMixerBlock.__init__È   sv   ø€ Ü‰ÑÔä)¨&Ó1ˆŒ	Ø ×+Ñ+ˆŒÜ"Ø×1Ñ1Ø×2Ñ2Øô
ˆŒð ×ÒÜ :Ø×1Ñ1¸F×<UÑ<Uô!ˆDÕð r&   r)   c                 óâ   — |}| j                  |«      }|j                  dddd«      }| j                  r| j                  |«      }| j	                  |«      }|j                  dddd«      }||z   }|S )zë
        Args:
            inputs (`torch.Tensor` of shape `((batch_size, num_channels, num_patches, d_model))`):
                input to the MLP layer
        Returns:
            `torch.Tensor` of the same shape as `inputs`
        r   r	   r?   r   )rm   Úpermuter‰   rŒ   r‹   )r#   r)   ÚresidualÚouts       r%   r+   z,PatchTSMixerChannelFeatureMixerBlock.forwardØ   ss   € ð ˆØ—‘˜6Ó"ˆà—‘  1 a¨Ó+ˆà?Š?Ø×&Ñ& vÓ.ˆFà—‘˜&Ó!ˆà—‘  1 a¨Ó+ˆàxÑˆØˆ
r&   rB   r2   s   @r%   r„   r„   À   s%   ø„ ñðÐ1õ ð ˜eŸl™l÷ r&   r„   ÚmoduleÚqueryÚkeyÚvalueÚattention_maskÚscalingry   Ú	head_maskc                 óÎ  — |€|j                  d«      dz  }t        j                  ||j                  dd«      «      |z  }	||	|z   }	t        j
                  j                  |	d¬«      }	||	|j                  dddd«      z  }	t        j
                  j                  |	|| j                  ¬«      }	t        j                  |	|«      }
|
j                  dd«      j                  «       }
|
|	fS )Nr   ç      à¿r?   r	   r   r   )ÚpÚtraining)ÚsizerC   Úmatmulr@   r   r   ÚsoftmaxÚviewry   r›   Ú
contiguous)r‘   r’   r“   r”   r•   r–   ry   r—   ÚkwargsÚattn_weightsÚattn_outputs              r%   Úeager_attention_forwardr¤   ñ   sØ   € ð €Ø—*‘*˜R“. DÑ(ˆä—<‘<  s§}¡}°Q¸Ó':Ó;¸gÑE€LØÐ!Ø# nÑ4ˆä—=‘=×(Ñ(¨¸2Ð(Ó>€LàÐØ# i§n¡n°Q¸¸A¸qÓ&AÑAˆä—=‘=×(Ñ(¨¸È6Ï?É?Ð(Ó[€LÜ—,‘,˜|¨UÓ3€KØ×'Ñ'¨¨1Ó-×8Ñ8Ó:€Kà˜Ð$Ð$r&   c                   óH  ‡ — e Zd ZdZ	 	 	 	 	 ddededededededee   fˆ fd	„Z		 	 	 	 dd
e
j                  dee
j                     dee
j                     dee
j                     dee   dee   dee
j                  ee
j                     eee
j                        f   fd„Zˆ xZS )ÚPatchTSMixerAttentionz=Multi-headed attention from 'Attention Is All You Need' paperÚ	embed_dimÚ	num_headsry   Ú
is_decoderÚbiasÚ	is_causalr5   c                 ó
  •— t         ‰|   «        || _        || _        || _        ||z  | _        || _        | j
                  |z  | j                  k7  rt        d| j                  › d|› d«      ‚| j
                  dz  | _        || _	        || _
        t        j                  |||¬«      | _        t        j                  |||¬«      | _        t        j                  |||¬«      | _        t        j                  |||¬«      | _        y )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: ú).r™   )rª   )r   r   r§   r¨   ry   Úhead_dimr5   r`   r–   r©   r«   r   r   Úk_projÚv_projÚq_projÚout_proj)	r#   r§   r¨   ry   r©   rª   r«   r5   r$   s	           €r%   r   zPatchTSMixerAttention.__init__  sä   ø€ ô 	‰ÑÔØ"ˆŒØ"ˆŒØˆŒØ! YÑ.ˆŒØˆŒàM‰M˜IÑ%¨$¯.©.Ò8ÜØMÈdÏnÉnÐM]Ø$ Y K¨rð3óð ð —}‘} dÑ*ˆŒØ$ˆŒØ"ˆŒä—i‘i 	¨9¸4Ô@ˆŒÜ—i‘i 	¨9¸4Ô@ˆŒÜ—i‘i 	¨9¸4Ô@ˆŒÜŸ	™	 )¨Y¸TÔBˆr&   Úhidden_statesÚkey_value_statesr•   Úlayer_head_maskÚoutput_attentionsr¡   rN   c                 ó  — |du}|j                   dd \  }}	|r|j                   d   n|	}
||	d| j                  f}||
d| j                  f} | j                  |«      j                  |Ž j	                  dd«      }|r|n|} | j                  |«      j                  |Ž j	                  dd«      } | j                  |«      j                  |Ž j	                  dd«      }t        }| j                  j                  dk7  rt        | j                  j                     } || ||||f| j                  sdn| j                  | j                  ||dœ|¤Ž\  }}|j                  ||	d«      j                  «       }| j!                  |«      }||dfS )z#Input shape: Batch x Time x ChannelNr   r   r?   Úeagerç        )ry   r–   r¶   r—   )rq   r®   r±   rŸ   r@   r¯   r°   r¤   r5   Ú_attn_implementationr   r›   ry   r–   rp   r    r²   )r#   r³   r´   r•   rµ   r¶   r¡   Úis_cross_attentionÚbszÚtgt_lenÚsrc_lenÚq_input_shapeÚkv_input_shapeÚquery_statesÚcurrent_statesÚ
key_statesÚvalue_statesÚattention_interfacer£   r¢   s                       r%   r+   zPatchTSMixerAttention.forward2  sœ  € ð .°TÐ9Ðð %×*Ñ*¨3¨BÐ/‰ˆˆWÙ/AÐ"×(Ñ(¨Ò+Àwˆà˜g r¨4¯=©=Ð9ˆØ˜w¨¨D¯M©MÐ:ˆð 7t—{‘{ =Ó1×6Ñ6¸ÐF×PÑPÐQRÐTUÓVˆá-?Ñ)À]ˆØ5T—[‘[ Ó0×5Ñ5°~ÐF×PÑPÐQRÐTUÓVˆ
Ø7t—{‘{ >Ó2×7Ñ7¸ÐH×RÑRÐSTÐVWÓXˆä(?ÐØ;‰;×+Ñ+¨wÒ6Ü"9¸$¿+¹+×:ZÑ:ZÑ"[Ðá$7ØØØØØð%
ð  $Ÿ}š}‘C°$·,±,Ø—L‘LØ/Ø%ñ%
ð ñ%
Ñ!ˆ\ð "×)Ñ)¨#¨w¸Ó;×FÑFÓHˆØ—m‘m KÓ0ˆà˜L¨$Ð.Ð.r&   )r¹   FTFN)NNNF)r,   r-   r.   r/   r0   ÚfloatÚboolr   r   r   rC   rD   r   r
   Útupler+   r1   r2   s   @r%   r¦   r¦     s  ø„ ÙGð Ø ØØØ/3ñCàðCð ðCð ð	Cð
 ðCð ðCð ðCð Ð+Ñ,õCðD 48Ø15Ø26Ø,1ñ3/à—|‘|ð3/ð # 5§<¡<Ñ0ð3/ð ! §¡Ñ.ð	3/ð
 " %§,¡,Ñ/ð3/ð $ D™>ð3/ð Ð-Ñ.ð3/ð 
ˆu|‰|˜X e§l¡lÑ3°X¸eÀEÇLÁLÑ>QÑ5RÐRÑ	S÷3/r&   r¦   c                   ó.   ‡ — e Zd ZdZdefˆ fd„Zd„ Zˆ xZS )ÚPatchMixerBlockzxThis module mixes the patch dimension.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.
    r5   c                 óÖ  •— t         ‰|   «        t        |«      | _        |j                  | _        |j
                  | _        t        |j                  |j                  |¬«      | _        |j
                  r&t        |j                  |j                  ¬«      | _
        |j                  rCt        |j                  |j                  |j                  |¬«      | _        t        |«      | _        y y )Nr‡   rˆ   )r§   r¨   ry   r5   )r   r   rh   rm   Ú	self_attnr‰   rt   rM   r‹   r   rŒ   r¦   r:   Úself_attn_headsry   Úself_attn_layerÚ	norm_attnr=   s     €r%   r   zPatchMixerBlock.__init__p  s¿   ø€ Ü‰ÑÔä)¨&Ó1ˆŒ	à×)Ñ)ˆŒØ ×+Ñ+ˆŒä"Ø×*Ñ*Ø×+Ñ+Øô
ˆŒð ×ÒÜ :À6×CUÑCUÐ`f×`rÑ`rÔ sˆDÔà×ÒÜ#8Ø Ÿ.™.Ø ×0Ñ0ØŸ™Øô	$ˆDÔ ô 3°6Ó:ˆDNð r&   c                 óÖ  — |}| j                  |«      }| j                  rR|j                  \  }}}}|j                  ||z  ||«      }| j	                  |d¬«      \  }}	}	|j                  ||||«      }|j                  dd«      }| j                  |«      }| j                  r| j                  |«      }|j                  dd«      }| j                  r| j                  |z   «      }||z   }
|
S )z’
        Args:
            hidden_state (`torch.Tensor`): Input tensor.

        Returns:
            `torch.Tensor`: Transformed tensor.
        F)r¶   r?   r	   )
rm   rÌ   rq   rp   rÎ   r@   r‹   r‰   rŒ   rÏ   )r#   re   r   Ú
batch_sizeÚn_varsrM   r:   Úhidden_state_reshapedÚx_attnÚ_r   s              r%   r+   zPatchMixerBlock.forwardŠ  sô   € ð  ˆà—y‘y Ó.ˆà>Š>Ø7C×7IÑ7IÑ4ˆJ˜ ¨WØ$0×$8Ñ$8¸ÀfÑ9LÈkÐ[bÓ$cÐ!à×/Ñ/Ð0EÐY^Ð/Ó_‰LˆFAqØ—^‘^ J°¸ÀWÓMˆFð $×-Ñ-¨a°Ó3ˆØ—x‘x Ó-ˆà?Š?Ø×,Ñ,¨\Ó:ˆLð $×-Ñ-¨a°Ó3ˆà>Š>ØŸ>™>¨,¸Ñ*?Ó@ˆLà˜XÑ%ˆØˆ
r&   ©r,   r-   r.   r/   r   r   r+   r1   r2   s   @r%   rÊ   rÊ   h  s   ø„ ñð;Ð1õ ;ö4!r&   rÊ   c                   óH   ‡ — e Zd ZdZdefˆ fd„Zdej                  fd„Zˆ xZ	S )ÚFeatureMixerBlockz‚This module mixes the hidden feature dimension.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.

    r5   c                 ó  •— t         ‰|   «        t        |«      | _        |j                  | _        t        |j                  |j                  |¬«      | _        |j                  r't        |j                  |j                  ¬«      | _	        y y r†   )
r   r   rh   rm   r‰   rt   r:   r‹   r   rŒ   r=   s     €r%   r   zFeatureMixerBlock.__init__·  sn   ø€ Ü‰ÑÔä)¨&Ó1ˆŒ	à ×+Ñ+ˆŒä"ØŸ™ØŸ™Øô
ˆŒð ×ÒÜ :À6Ç>Á>Ð\b×\jÑ\jÔ kˆDÕð r&   Úhiddenc                 ó’   — |}| j                  |«      }| j                  |«      }| j                  r| j                  |«      }||z   }|S )ú×
        Args:
            hidden (`torch.Tensor` of shape `(batch_size, num_patches, d_model)`):
                Input tensor to the layer.

        Returns:
            `torch.Tensor`: Transformed tensor.
        )rm   r‹   r‰   rŒ   )r#   rÚ   r   r   s       r%   r+   zFeatureMixerBlock.forwardÇ  sK   € ð ˆØ—‘˜6Ó"ˆØ—‘˜&Ó!ˆà?Š?Ø×&Ñ& vÓ.ˆFàxÑˆØˆ
r&   rB   r2   s   @r%   rØ   rØ   ®  s'   ø„ ñðlÐ1õ lð ˜eŸl™l÷ r&   rØ   c                   óH   ‡ — e Zd ZdZdefˆ fd„Zdej                  fd„Zˆ xZ	S )ÚPatchTSMixerLayerz•
    The `PatchTSMixer` layer that does all three kinds of mixing.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.

    r5   c                 óÊ   •— t         ‰|   «        t        |¬«      | _        t	        |¬«      | _        |j                  | _        |j                  dk(  rt        |¬«      | _        y y )N©r5   Úmix_channel)	r   r   rÊ   Úpatch_mixerrØ   Úfeature_mixerÚmoder„   Úchannel_feature_mixerr=   s     €r%   r   zPatchTSMixerLayer.__init__å  sR   ø€ Ü‰ÑÔä*°&Ô9ˆÔÜ.°fÔ=ˆÔà—K‘KˆŒ	à;‰;˜-Ò'Ü)MÐU[Ô)\ˆDÕ&ð (r&   rÚ   c                 óŠ   — | j                   dk(  r| j                  |«      }| j                  |«      }| j                  |«      }|S )rÜ   rá   )rä   rå   râ   rã   )r#   rÚ   s     r%   r+   zPatchTSMixerLayer.forwardð  sE   € ð 9‰9˜Ò%Ø×/Ñ/°Ó7ˆFà×!Ñ! &Ó)ˆØ×#Ñ# FÓ+ˆØˆr&   rB   r2   s   @r%   rÞ   rÞ   Û  s'   ø„ ñð	]Ð1õ 	]ð˜eŸl™l÷ r&   rÞ   c                   ó6   ‡ — e Zd ZdZdefˆ fd„Zddefd„Zˆ xZS )ÚPatchTSMixerBlockz‹The main computing framework of the `PatchTSMixer` model.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.
    r5   c                 óº   •— t         ‰|   «        |j                  }t        j                  t        |«      D cg c]  }t        |¬«      ‘Œ c}«      | _        y c c}w ©Nrà   )r   r   Ú
num_layersr   Ú
ModuleListÚrangerÞ   Úmixers)r#   r5   rë   rÕ   r$   s       €r%   r   zPatchTSMixerBlock.__init__	  sC   ø€ Ü‰ÑÔà×&Ñ&ˆ
ä—m‘mÌuÐU_ÓO`Ö$aÈ!Ô%6¸fÖ%EÒ$aÓbˆùÒ$as   ¸AÚoutput_hidden_statesc                 óx   — g }|}| j                   D ]  } ||«      }|sŒ|j                  |«       Œ  |r||fS |dfS )as  
        Args:
            hidden_state (`torch.Tensor`): The input tensor.
            output_hidden_states (`bool`, *optional*, defaults to False.):
                Whether to output the hidden states as well.

        Returns:
            `torch.Tensor`: The embedding. `list`: List of all hidden states if `output_hidden_states` is set to
            `True`.
        N)rî   Úappend)r#   re   rï   Úall_hidden_statesÚ	embeddingÚmods         r%   r+   zPatchTSMixerBlock.forward  sW   € ð Ðà ˆ	à—;‘;ò 	4ˆCÙ˜I›ˆIÚ#Ø!×(Ñ(¨Õ3ð	4ñ
  ØÐ/Ð/Ð/à˜d?Ð"r&   ©F)	r,   r-   r.   r/   r   r   rÇ   r+   r1   r2   s   @r%   rè   rè     s#   ø„ ñðcÐ1õ cñ#¸$÷ #r&   rè   c                   ó0   ‡ — e Zd ZdZddefˆ fd„Zd„ Zˆ xZS )ÚPatchTSMixerForPredictionHeadzqPrediction Head for Forecasting

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.
    r5   c                 óæ  •— t         ‰|   «        |j                  | _        | j                  | j                  j                  «        t	        j
                  |j                  «      | _        |€=t	        j                  |j                  |j                  z  |j                  «      | _        n-|j                  |j                  |j                  z  «      | _        t	        j                  d¬«      | _        y )Néþÿÿÿ©Ú	start_dim)r   r   Úprediction_channel_indicesÚsortr   rx   Úhead_dropoutÚdropout_layerr   rM   r:   Úprediction_lengthÚbase_forecast_blockÚget_parameter_projectionÚFlattenÚflatten)r#   r5   Údistribution_outputr$   s      €r%   r   z&PatchTSMixerForPredictionHead.__init__2  s·   ø€ Ü‰ÑÔà*0×*KÑ*KˆÔ'à×*Ñ*Ð6Ø×+Ñ+×0Ñ0Ô2äŸZ™Z¨×(;Ñ(;Ó<ˆÔØÐ&Ü')§y¡y°&×2DÑ2DÀvÇ~Á~Ñ2UÐX^×XpÑXpÓ'qˆDÕ$à':×'SÑ'SØ×"Ñ" V§^¡^Ñ3ó(ˆDÔ$ô —z‘z¨BÔ/ˆr&   c                 ó^  ‡ — ‰ j                  |«      }‰ j                  |«      }‰ j                  |«      }t        |t        «      rt	        d„ |D «       «      }n|j                  dd«      }‰ j                  7t        |t        «      rt	        ˆ fd„|D «       «      }|S |d‰ j                  f   }|S )ar  

        Args:
            hidden_features (`torch.Tensor` of shape `(batch_size, num_patch, d_model)` in `flatten` mode
                or `(batch_size, n_vars, num_patch, d_model)` in `common_channel`/`mix_channel` mode.): Input hidden
                features.

        Returns:
            `torch.Tensor` of shape `(batch_size, prediction_length, nvars)`.

        c              3   ó@   K  — | ]  }|j                  d d«      –— Œ y­w)r   rù   N)r@   )Ú.0Úzs     r%   ú	<genexpr>z8PatchTSMixerForPredictionHead.forward.<locals>.<genexpr>U  s   è ø€ ÒC°Q˜QŸ[™[¨¨R×0ÑCùs   ‚r   rù   c              3   ó@   •K  — | ]  }|d ‰j                   f   –— Œ y­w).N)rü   )r  r	  r#   s     €r%   r
  z8PatchTSMixerForPredictionHead.forward.<locals>.<genexpr>[  s!   øè ø€ Ò [ÈQ  3¨×(GÑ(GÐ#GÕ!HÑ [ùs   ƒ.)r  rÿ   r  Ú
isinstancerÈ   r@   rü   ©r#   Úhidden_featuresÚforecasts   `  r%   r+   z%PatchTSMixerForPredictionHead.forwardD  s©   ø€ ð Ÿ,™, Ó7ˆØ×,Ñ,¨_Ó=ˆØ×+Ñ+¨OÓ<ˆÜh¤Ô&ÜÑC¸(ÔCÓC‰Hà×)Ñ)¨"¨bÓ1ˆHà×*Ñ*Ð6Ü˜(¤EÔ*Ü Ó [ÐRZÔ [Ó[ð ˆð $ C¨×)HÑ)HÐ$HÑIàˆr&   r(   rÖ   r2   s   @r%   r÷   r÷   *  s   ø„ ññ0Ð1õ 0ö$r&   r÷   c                   ó0   ‡ — e Zd ZdZddefˆ fd„Zd„ Zˆ xZS )ÚPatchTSMixerLinearHeadz€Linear head for Classification and Regression.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.
    r5   c                 ó\  •— t         ‰|   «        |j                  | _        |j                  | _        |j                  €|j                  }nd}|| _        |€@t        j                  |j                  |j                  z  |z  |j                  «      | _        n0|j                  |j                  |j                  z  |z  «      | _        |j                  €t        j                  d¬«      | _        nt        j                  d¬«      | _        t        j                  |j                   «      | _        y )Nr   éýÿÿÿrú   rù   )r   r   Úhead_aggregationÚoutput_rangerM   r  r   r   r:   rŠ   Únum_targetsÚ
projectionr  r  r  rx   rþ   ry   )r#   r5   r  Ú
mul_factorr$   s       €r%   r   zPatchTSMixerLinearHead.__init__j  só   ø€ Ü‰ÑÔà &× 7Ñ 7ˆÔØ"×/Ñ/ˆÔà×"Ñ"Ð*Ø×+Ñ+‰JàˆJØ#6ˆÔ ØÐ&Ü Ÿi™iØ—‘ ×!:Ñ!:Ñ:¸ZÑGØ×"Ñ"óˆDOð
 2×JÑJØ—‘ ×!:Ñ!:Ñ:¸ZÑGóˆDŒOð ×"Ñ"Ð*ÜŸ:™:°Ô3ˆDLäŸ:™:°Ô3ˆDŒLä—z‘z &×"5Ñ"5Ó6ˆr&   c                 ó&  — |j                  dd«      }| j                  dk(  r|d   }nM| j                  dk(  r|j                  d¬«      j                  }n!| j                  dk(  r|j	                  d¬«      }| j
                  r| j                  |«      }| j                  |«      }| j                  |«      }| j                  €Q| j                  Et        j                  |«      | j                  d   | j                  d	   z
  z  | j                  d	   z   }|S )
ai  
        Args:
            hidden_features (`torch.Tensor` of shape `(batch_size x num_patch x d_model)` in `flatten` mode
                or `(batch_size x n_vars x num_patch x d_model)` in `common_channel`/`mix_channel` mode.): Input hidden
                features.

        Returns:
            `torch.Tensor` of shape `(batch_size x num_targets)`.
        r   rù   Úuse_last).r   Úmax_poolr   Úavg_poolr   r   )r@   r  ÚmaxÚvaluesr^   r  ry   r  r  r  rC   Úsigmoid)r#   r  s     r%   r+   zPatchTSMixerLinearHead.forward†  s
  € ð *×3Ñ3°B¸Ó;ˆØ× Ñ  JÒ.à-¨gÑ6‰OØ×"Ñ" jÒ0à-×1Ñ1°bÐ1Ó9×@Ñ@‰OØ×"Ñ" jÒ0à-×2Ñ2°rÐ2Ó:ˆOà<Š<Ø"Ÿl™l¨?Ó;ˆOØŸ,™, Ó7ˆØŸ/™/¨/Ó:ˆà×$Ñ$Ð,°4×3DÑ3DÐ3Pä—‘˜oÓ.°$×2CÑ2CÀAÑ2FÈ×IZÑIZÐ[\ÑI]Ñ2]Ñ^Ðae×arÑarÐstÑauÑuð ð Ðr&   r(   rÖ   r2   s   @r%   r  r  b  s   ø„ ññ7Ð1õ 7ö8 r&   r  c                   ó*   — e Zd ZU eed<   dZdZdZd„ Zy)ÚPatchTSMixerPreTrainedModelr5   ÚmodelÚpast_valuesFc                 ó„  — t        |t        «      rG| j                  j                  dk(  r-t        j
                  j                  |j                  dd¬«       yyt        |t        j                  t        j                  f«      rJ|j                  j                  j                  «        |j                  j                  j                  d«       yt        |t        «      r^|j                   j                  j                  j                  «        |j                   j                  j                  j                  d«       yt        |t        j"                  «      rm|j                  j                  j                  d| j                  j$                  ¬«       |j                  %|j                  j                  j                  «        yyy)zInitialize weightsrP   r¹   gš™™™™™¹?)r^   r_   ç      ð?N)r  rF   r5   rU   r   ÚinitÚnormal_rJ   rn   r9   rª   ÚdataÚzero_ÚweightÚfill_r4   r<   r   Úinit_std)r#   r‘   s     r%   Ú_init_weightsz)PatchTSMixerPreTrainedModel._init_weights±  s.  € äfÔ<Ô=à{‰{×3Ñ3°xÒ?Ü—‘—‘ × 3Ñ 3¸#À3ÕGð @ä˜¤§¡¬r¯~©~Ð >Ô?ØK‰K×Ñ×"Ñ"Ô$ØM‰M×Ñ×$Ñ$ SÕ)Ü˜Ô 5Ô6Ø×Ñ×!Ñ!×&Ñ&×,Ñ,Ô.Ø×Ñ×#Ñ#×(Ñ(×.Ñ.¨sÕ3Ü˜¤§	¡	Ô*ØM‰M×Ñ×&Ñ&¨C°T·[±[×5IÑ5IÐ&ÔJØ{‰{Ð&Ø—‘× Ñ ×&Ñ&Õ(ð 'ð +r&   N)	r,   r-   r.   r   Ú__annotations__Úbase_model_prefixÚmain_input_nameÚsupports_gradient_checkpointingr-  © r&   r%   r!  r!  ©  s    … ð ÓØÐØ#€OØ&+Ð#ó)r&   r!  c                   ó.   ‡ — e Zd ZdZdefˆ fd„Zd„ Zˆ xZS )ÚPatchTSMixerPretrainHeadzcPretraining head.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.
    r5   c                 óÈ   •— t         ‰|   «        t        j                  |j                  «      | _        t        j                  |j                  |j                  «      | _	        y r(   )
r   r   r   rx   rþ   rÿ   r   r:   Úpatch_lengthÚbase_pt_blockr=   s     €r%   r   z!PatchTSMixerPretrainHead.__init__Ë  sB   ø€ Ü‰ÑÔäŸZ™Z¨×(;Ñ(;Ó<ˆÔÜŸY™Y v§~¡~°v×7JÑ7JÓKˆÕr&   c                 óJ   — | j                  |«      }| j                  |«      }|S )a  
        Args:
            hidden_features (`torch.Tensor` of shape `(batch_size x num_patch x d_model)` in `flatten` mode
                or `(batch_size x n_vars x num_patch x d_model)` in `common_channel`/`mix_channel` mode.): Input hidden
                features.

        Returns:
            `torch.Tensor` of shape `(batch_size x n_vars x num_patch x patch_length)`.
        )rÿ   r7  r  s      r%   r+   z PatchTSMixerPretrainHead.forwardÑ  s)   € ð ×,Ñ,¨_Ó=ˆØ×%Ñ% oÓ6ˆØˆr&   rÖ   r2   s   @r%   r4  r4  Ã  s   ø„ ñðLÐ1õ Lör&   r4  r)   Ú
mask_ratioÚunmasked_channel_indicesÚchannel_consistent_maskingÚ
mask_valuec                 ó°  — |dk  s|dk\  rt        d|› d«      ‚| j                  \  }}}}| j                  }	t        |d|z
  z  «      }
|r-t	        j
                  |d||	¬«      }|j                  d|d«      }nt	        j
                  ||||	¬«      }t	        j                  ||||	¬«      }d|dd…dd…d|
…f<   t	        j                  |d¬«      }t	        j                  |d¬«      }t	        j                  |d|¬	«      }|j                  d«      j                  ddd|«      }|d|dd…|dd…dd…f<   | j                  |j                  «       |«      }||d
   fS )aÆ  random_masking: Mask the input considering the control variables.

    Args:
        inputs (`torch.Tensor` of shape `(batch_size, num_channels, sequence_length, num_features)`):
            The input tensor to mask.
        mask_ratio (`float`):
            Masking ratio applied to mask the input data during random pretraining. It is the number between 0 and 1.
        unmasked_channel_indices (list, *optional*):
            Indices of channels that will not be masked.
        channel_consistent_masking (bool, *optional*, defaults to `False`):
            When true, masking will be same across all channels of a timeseries. Otherwise, masking positions will vary
            across channels.
        mask_value (int, *optional*, defaults to 0):
            Define the value of masked patches for pretraining.

    Returns:
        `tuple(torch.Tensor)`: inputs_mask, masked input, same shape as input Tensor and mask tensor of shape [bs x c x
        n]
    r   r   zMask ratio z has to be between 0 and 1.©ÚdeviceNr   r   )r   Úindex©.r   )r`   rq   r?  r0   rC   ÚrandÚrepeatÚonesÚargsortÚgatherrX   Úmasked_fillrÇ   )r)   r9  r:  r;  r<  rÑ   Únum_channelsÚsequence_lengthÚnum_featuresr?  Úlen_keepÚnoiseÚmaskÚids_shuffleÚids_restoreÚinputs_masks                   r%   Úrandom_maskingrQ  â  sQ  € ð4 A‚~˜ qšÜ˜; z lÐ2MÐNÓOÐOà>D¿l¹lÑ;€J˜o¨|Ø]‰]€Fä? a¨*¡nÑ5Ó6€Há!Ü—
‘
˜: q¨/À&ÔIˆØ—‘˜Q ¨aÓ0‰ô —
‘
˜: |°_ÈVÔTˆô :‰:j ,°ÈÔO€DØ€DŠŠAˆyˆyˆÑô —-‘- ¨2Ô.€KÜ—-‘- °Ô4€Kä<‰<˜ "¨KÔ8€DØ>‰>˜"Ó×$Ñ$ Q¨¨1¨lÓ;€DØÐ+Ø23ˆŠQÐ(ª!ªQÐ.Ñ/à×$Ñ$ T§Y¡Y£[°*Ó=€KØ˜˜V™Ð$Ð$r&   Únum_forecast_mask_patchesc                 óP  — t        |t        «      r|g}|D cg c]  }d‘Œ }}| j                  \  }}}}	t        j                  |||| j
                  ¬«      }
g }d}t        |«      }t        ||«      D ]H  \  }}|dk  s||k\  rt        d|› d«      ‚t        ||z  |z  «      }|j                  |||g«       ||z  }ŒJ t        |d„ ¬«      }||k  r|d   d   ||z
  z   |d   d<   n||kD  r|d	   d   ||z
  z   |d	   d<   d}|D ]  \  }}}||z   }d|
||…d
d
…| d
…f<   |}Œ t        j                  |
j                  d   «      }|
|   }
|
j                  d	«      j                  ddd|	«      }
|d|
d
d
…|d
d
…d
d
…f<   | j                  |
j                  «       |«      }||
d   fS c c}w )a¡  Forecast masking that masks the last K patches where K is from the num_forecast_mask_patches.
    If num_forecast_mask_patches is a list, samples in the batch will be randomly masked by numbers defined in the list.

    Parameters:
        inputs (`torch.Tensor`):
            Input of shape `(bs, num_channels, num_patch, patch_length)`
        num_forecast_mask_patches (`list`):
            Number of patches to be masked at the end of each batch sample. e.g. 4 or [3, 5].
        unmasked_channel_indices (`list`, *optional*):
            Indices of channels that are not masked.
        mask_value (`int`, *optional*, defaults to 0):
            Values in the masked patches will be filled by `mask_value`.

    Returns:
        `tuple(torch.Tensor)`: inputs_mask, masked input, same shape as inputs Tensor and Mask tensor of shape `(bs,
        num_channels , num_patch)` or `(bs, tsg1, tsg2, num_channels, num_patch)`
    r   r>  r   znum_forecast_mask_patches z6 should be greater than 0 and less than total patches.c                 ó   — | d   S ©Nr?   r2  )Úxs    r%   ú<lambda>z"forecast_masking.<locals>.<lambda>I  s
   € ¨!¨A©$€ r&   )r“   r?   r   NrA  )r  r0   rq   rC   rL   r?  ÚsumÚzipr`   rñ   ÚsortedÚrandpermrX   rC  rG  rÇ   )r)   rR  r:  r<  rÕ   Úforecast_mask_ratiosrÑ   rH  rI  rJ  rM  Út_listÚtotal_lengthÚtotal_ratior6  ÚratioÚtemp_lenÚbatch1Ú	patch_lenÚbatch2ÚpermrP  s                         r%   Úforecast_maskingrf    s  € ô0 Ð+¬SÔ1Ø%>Ð$?Ð!Ø'@ÖA !šAÐAÐÐAà>D¿l¹lÑ;€J˜o¨|Ü;‰;z <°ÈÏÉÔW€Dà€FØ€LÜÐ*Ó+€Kä"Ð#<Ð>RÓSò !ÑˆeØ˜1Ò °Ò ?ÜØ,¨\¨NÐ:pÐqóð ô z EÑ)¨KÑ7Ó8ˆØ‰| U¨HÐ5Ô6Ø˜Ñ ‰ð!ô F¡Ô/€FàjÒ Ø˜a‘y ‘| z°LÑ'@ÑAˆˆq‰	!ŠØ	˜
Ò	"Ø˜r™
 1™¨¸
Ñ)BÑCˆˆr‰
1‰à€FØ"(ò Ñˆ	1hØ˜(Ñ"ˆØ./ˆˆVFˆ]šA 	˜z™{Ð*Ñ+Ø‰ðô
 >‰>˜$Ÿ*™* Q™-Ó(€DØ‰:€Dà>‰>˜"Ó×$Ñ$ Q¨¨1¨lÓ;€DØÐ+Ø23ˆŠQÐ(ª!ªQÐ.Ñ/à×$Ñ$ T§Y¡Y£[°*Ó=€KØ˜˜V™Ð$Ð$ùòO Bs   ˜	F#c                   óH   ‡ — e Zd ZdZdefˆ fd„Zdej                  fd„Zˆ xZ	S )ÚPatchTSMixerPatchifyz³
    A class to patchify the time series sequence into different patches

    Returns:
        `torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`
    r5   c                 ó  •— t         ‰|   «        |j                  | _        |j                  | _        |j
                  | _        | j                  | j                  k  r&t        d| j                  › d| j                  › d«      ‚t        | j                  | j                  «      | j                  z
  | j
                  z  dz   | _        | j                  | j
                  | j                  dz
  z  z   }| j                  |z
  | _	        y )NzSequence length (z+) has to be greater than the patch length (ú)r   )
r   r   Úcontext_lengthrI  r6  Úpatch_strider`   r  rM   Úsequence_start)r#   r5   Únew_sequence_lengthr$   s      €r%   r   zPatchTSMixerPatchify.__init__j  sò   ø€ Ü‰ÑÔà%×4Ñ4ˆÔØ"×/Ñ/ˆÔØ"×/Ñ/ˆÔà×Ñ 4×#4Ñ#4Ò4ÜØ# D×$8Ñ$8Ð#9Ð9dÐei×evÑevÐdwÐwxÐyóð ô
   × 4Ñ 4°d×6GÑ6GÓHÈ4×K\ÑK\Ñ\Ðae×arÑarÑrÐuvÑvˆÔØ"×/Ñ/°$×2CÑ2CÀt×GWÑGWÐZ[ÑG[Ñ2\Ñ\ÐØ"×2Ñ2Ð5HÑHˆÕr&   r#  c                 ó:  — |j                   d   }|| j                  k7  rt        d|› d| j                  › d«      ‚|dd…| j                  d…dd…f   }|j	                  d| j
                  | j                  ¬«      }|j                  dd«      j                  «       }|S )a!  
        Parameters:
            past_values (`torch.Tensor` of shape `(batch_size, sequence_length, num_channels)`, *required*):
                Input for patchification

        Returns:
            `torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`
        rù   zInput sequence length (z%) doesn't match model configuration (r­   N)Ú	dimensionrœ   Ústepr  )	rq   rI  r`   rm  Úunfoldr6  rl  r@   r    )r#   r#  rI  rA   s       r%   r+   zPatchTSMixerPatchify.forward{  s¨   € ð &×+Ñ+¨BÑ/ˆØ˜d×2Ñ2Ò2ÜØ)¨/Ð):Ð:_Ð`d×`tÑ`tÐ_uÐuwÐxóð ð šQ × 3Ñ 3Ñ 5²qÐ8Ñ9ˆà—‘¨°$×2CÑ2CÈ$×J[ÑJ[Ó\ˆà×!Ñ! " bÓ)×4Ñ4Ó6ˆØˆr&   rB   r2   s   @r%   rh  rh  b  s'   ø„ ñðIÐ1õ Ið" 5§<¡<÷ r&   rh  c                   óH   ‡ — e Zd ZdZdefˆ fd„Zdej                  fd„Zˆ xZ	S )ÚPatchTSMixerMaskinga”  
    Class to perform random or forecast masking.

    Parameters:
        config (`PatchTSMixerConfig`): model config
    Returns:
        x_mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`)
            Masked patched input
        mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches)`)
            Bool tensor indicating True on masked points
    r5   c                 ó<  •— t         ‰|   «        |j                  | _        |j                  | _        |j                  | _        |j
                  | _        |j                  | _        |j                  | _        | j                  t        | j                  «      | _        y y r(   )	r   r   Úrandom_mask_ratior;  Ú	mask_typerR  r:  r<  rZ  r=   s     €r%   r   zPatchTSMixerMasking.__init__   s„   ø€ Ü‰ÑÔØ!'×!9Ñ!9ˆÔØ*0×*KÑ*KˆÔ'Ø×)Ñ)ˆŒØ)/×)IÑ)IˆÔ&Ø(.×(GÑ(GˆÔ%Ø ×+Ñ+ˆŒØ×(Ñ(Ð4Ü,2°4×3PÑ3PÓ,QˆDÕ)ð 5r&   rc   c                 ór  — | j                   dk(  r<t        || j                  | j                  | j                  | j
                  ¬«      \  }}nY| j                   dk(  r1t        || j                  | j                  | j
                  ¬«      \  }}nt        d| j                   › d«      ‚|j                  «       }||fS )aä  
        Parameters:
            patch_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`, *required*):
                Patch input

        Return:
            masked_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`)
                Masked patched input
            mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches)`)
                Bool tensor indicating True on masked points

        rP   )r)   r9  r:  r;  r<  r  )r)   rR  r:  r<  zInvalid mask type ú.)
rw  rQ  rv  r:  r;  r<  rf  rR  r`   rÇ   )r#   rc   Úmasked_inputrM  s       r%   r+   zPatchTSMixerMasking.forward«  s±   € ð >‰>˜XÒ%Ü!/Ø"Ø×1Ñ1Ø)-×)FÑ)FØ+/×+JÑ+JØŸ?™?ô"ÑˆL™$ð ^‰^˜zÒ)Ü!1Ø"Ø*.×*HÑ*HØ)-×)FÑ)FØŸ?™?ô	"ÑˆL™$ô Ð1°$·.±.Ð1AÀÐCÓDÐDð y‰y‹{ˆØ˜TÐ!Ð!r&   rB   r2   s   @r%   rt  rt  “  s'   ø„ ñ
ð	RÐ1õ 	Rð!" 5§<¡<÷ !"r&   rt  c            	       ó¬   ‡ — e Zd ZdZdefˆ fd„Zdej                  dej                  deej                  ej                  ej                  f   fd„Z	ˆ xZ
S )ÚPatchTSMixerStdScalerz½
    Standardize features by calculating the mean and scaling along the first dimension, and then normalizes it by
    subtracting from the mean and dividing by the standard deviation.
    r5   c                 óè   •— t         ‰|   «        t        |d«      r|j                  nd| _        t        |d«      r|j
                  nd| _        t        |d«      r|j                  | _        y d| _        y )NÚscaling_dimr   ÚkeepdimTÚminimum_scalegñhãˆµøä>)r   r   Úhasattrr~  r   r  r€  r=   s     €r%   r   zPatchTSMixerStdScaler.__init__Ö  s[   ø€ Ü‰ÑÔÜ)0°¸Ô)G6×%Ò%ÈQˆŒÜ)0°¸Ô)Cv—~’~ÈˆŒÜ5<¸VÀ_Ô5U˜V×1Ñ1ˆÕÐ[_ˆÕr&   r(  Úobserved_indicatorrN   c                 óŒ  — |j                  | j                  | j                  ¬«      }|j                  d«      }||z  j                  | j                  | j                  ¬«      |z  }||z
  |z  dz  j                  | j                  | j                  ¬«      |z  }t	        j
                  || j                  z   «      }||z
  |z  ||fS )áC  
        Parameters:
            data (`torch.Tensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                input for Batch norm calculation
            observed_indicator (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                Calculating the scale on the observed indicator.
        Returns:
            tuple of `torch.Tensor` of shapes
                (`(batch_size, sequence_length, num_input_channels)`,`(batch_size, 1, num_input_channels)`,
                `(batch_size, 1, num_input_channels)`)
        ©r  r%  r?   )rX  r   r  Ú	clamp_minrC   Úsqrtr€  )r#   r(  r‚  ÚdenominatorÚlocÚvarianceÚscales          r%   r+   zPatchTSMixerStdScaler.forwardÜ  s¾   € ð )×,Ñ,¨T¯X©X¸t¿|¹|Ð,ÓLˆØ!×+Ñ+¨CÓ0ˆØÐ(Ñ(×-Ñ-¨d¯h©hÀÇÁÐ-ÓMÐP[Ñ[ˆà˜S‘jÐ$6Ñ6¸1Ñ<×AÑAÀ$Ç(Á(ÐTX×T`ÑT`ÐAÓaÐdoÑoˆÜ—
‘
˜8 d×&8Ñ&8Ñ8Ó9ˆØs‘
˜eÑ# S¨%Ð/Ð/r&   ©r,   r-   r.   r/   r   r   rC   rD   rÈ   r+   r1   r2   s   @r%   r|  r|  Ð  sT   ø„ ñð
`Ð1õ `ð0Ø—L‘Lð0Ø6;·l±lð0à	ˆu|‰|˜UŸ\™\¨5¯<©<Ð7Ñ	8÷0r&   r|  c            	       ó¬   ‡ — e Zd ZdZdefˆ fd„Zdej                  dej                  deej                  ej                  ej                  f   fd„Z	ˆ xZ
S )ÚPatchTSMixerMeanScalerzŠ
    Computes a scaling factor as the weighted average absolute value along the first dimension, and scales the data
    accordingly.
    r5   c                 ó&  •— t         ‰|   «        t        |d«      r|j                  nd| _        t        |d«      r|j
                  nd| _        t        |d«      r|j                  nd| _        t        |d«      r|j                  | _        y d | _        y )Nr~  r   r  Tr€  ç»½×Ùß|Û=Údefault_scale)r   r   r  r~  r   r  r€  r‘  r=   s     €r%   r   zPatchTSMixerMeanScaler.__init__ú  su   ø€ Ü‰ÑÔÜ)0°¸Ô)G6×%Ò%ÈQˆŒÜ)0°¸Ô)Cv—~’~ÈˆŒÜ5<¸VÀ_Ô5U˜V×1Ò1Ð[`ˆÔÜ5<¸VÀ_Ô5U˜V×1Ñ1ˆÕÐ[_ˆÕr&   r(  r‚  rN   c                 óÊ  — ||z  j                  «       j                  | j                  d¬«      }|j                  | j                  d¬«      }|t        j                  |d¬«      z  }| j
                  €Q|j                  d¬«      }t        j                  |j                  d«      d¬«      }t        j                  ||z  «      }n"| j
                  t        j                  |«      z  }t        j                  |dkD  ||«      }t        j                  || j                  ¬«      }||z  }	| j                  s|j                  | j                  ¬«      }|	t        j                  |«      |fS )r„  Tr…  r   ©Úminr   r   )ÚabsrX  r   rC   Úclampr‘  ÚsqueezeÚ	ones_likeÚwherer€  r  Ú
zeros_like)
r#   r(  r‚  Úts_sumÚnum_observedr‹  Ú	batch_sumÚbatch_observationsr‘  Úscaled_datas
             r%   r+   zPatchTSMixerMeanScaler.forward  s.  € ð Ð+Ñ+×0Ñ0Ó2×6Ñ6°t·x±xÈÐ6ÓNˆØ)×-Ñ-¨d¯h©hÀÐ-ÓEˆàœŸ™ \°qÔ9Ñ9ˆð ×ÑÐ%ØŸ
™
 q˜
Ó)ˆIÜ!&§¡¨\×-=Ñ-=¸aÓ-@ÀaÔ!HÐÜ!ŸM™M¨)Ð6HÑ*HÓI‰Mà ×.Ñ.´·±ÀÓ1GÑGˆMô —‘˜L¨1Ñ,¨e°]ÓCˆô —‘˜E t×'9Ñ'9Ô:ˆØ˜U‘lˆà|Š|Ø—M‘M d§h¡hMÓ/ˆEàœE×,Ñ,¨UÓ3°UÐ:Ð:r&   rŒ  r2   s   @r%   rŽ  rŽ  ô  sT   ø„ ñð
`Ð1õ `ð&;Ø—L‘Lð&;Ø6;·l±lð&;à	ˆu|‰|˜UŸ\™\¨5¯<©<Ð7Ñ	8÷&;r&   rŽ  c            
       ó¶   ‡ — e Zd ZdZdefˆ fd„Z	 ddej                  deej                     de	ej                  ej                  ej                  f   fd„Z
ˆ xZS )	ÚPatchTSMixerNOPScalerz|
    Assigns a scaling factor equal to 1 along the first dimension, and therefore applies no scaling to the input data.
    r5   c                 óª   •— t         ‰|   «        t        |d«      r|j                  nd| _        t        |d«      r|j
                  | _        y d| _        y )Nr~  r   r  T)r   r   r  r~  r   r  r=   s     €r%   r   zPatchTSMixerNOPScaler.__init__0  s@   ø€ Ü‰ÑÔÜ)0°¸Ô)G6×%Ò%ÈQˆŒÜ)0°¸Ô)Cv—~‘~ˆÈˆr&   r(  r‚  rN   c                 óü   — t        j                  |d¬«      j                  | j                  | j                  ¬«      }t        j
                  |d¬«      j                  | j                  | j                  ¬«      }|||fS )a  
        Parameters:
            data (`torch.Tensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                input for Batch norm calculation
        Returns:
            tuple of `torch.Tensor` of shapes
                (`(batch_size, sequence_length, num_input_channels)`,`(batch_size, 1, num_input_channels)`,
                `(batch_size, 1, num_input_channels)`)
        FrQ   )r   r  )rC   r˜  r^   r   r  rš  )r#   r(  r‚  r‹  r‰  s        r%   r+   zPatchTSMixerNOPScaler.forward5  si   € ô —‘ °EÔ:×?Ñ?ÀDÇHÁHÐVZ×VbÑVbÐ?ÓcˆÜ×Ñ˜t°5Ô9×>Ñ>À4Ç8Á8ÐUY×UaÑUaÐ>ÓbˆØS˜%ÐÐr&   r(   )r,   r-   r.   r/   r   r   rC   rD   r   rÈ   r+   r1   r2   s   @r%   r¡  r¡  +  s`   ø„ ñðNÐ1õ Nð PTñ Ø—L‘Lð Ø6>¸u¿|¹|Ñ6Lð à	ˆu|‰|˜UŸ\™\¨5¯<©<Ð7Ñ	8÷ r&   r¡  zS
    Base class for `PatchTSMixerEncoderOutput`, with potential hidden states.
    )Úcustom_introc                   óh   — e Zd ZU dZdZeej                     ed<   dZ	ee
ej                        ed<   y)ÚPatchTSMixerEncoderOutputa-  
    last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches, d_model)`):
        Hidden-state at the output of the last layer of the model.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*):
        Hidden-states of the model at the output of each layer.
    NÚlast_hidden_stater³   )r,   r-   r.   r/   r§  r   rC   ÚFloatTensorr.  r³   rÈ   r2  r&   r%   r¦  r¦  F  s9   … ñð 6:Ðx × 1Ñ 1Ñ2Ó9Ø8<€M8˜E %×"3Ñ"3Ñ4Ñ5Ô<r&   r¦  c                   óz   ‡ — e Zd ZdZdefˆ fd„Ze	 	 d	dej                  de	e
   de	e
   deeef   fd„«       Zˆ xZS )
ÚPatchTSMixerEncoderz°
    Encoder for PatchTSMixer which inputs patched time-series and outputs patched embeddings.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.
    r5   c                 óJ  •— t         ‰|   |«       |j                  | _        t        j                  |j
                  |j                  «      | _        |j                  rt        |¬«      | _
        nd | _
        t        |¬«      | _        |j                  r| j                  «        y y rê   )r   r   Úuse_return_dictr   r   r6  r:   ÚpatcherrH   rF   Úpositional_encoderrè   Úmlp_mixer_encoderÚ	post_initr=   s     €r%   r   zPatchTSMixerEncoder.__init__a  s   ø€ Ü‰Ñ˜Ô à%×5Ñ5ˆÔä—y‘y ×!4Ñ!4°f·n±nÓEˆŒØ×)Ò)Ü&DÈFÔ&SˆDÕ#à&*ˆDÔ#Ü!2¸&Ô!AˆÔð ×ÒØN‰NÕð r&   r#  rï   Úreturn_dictrN   c                 óð   — ||n| j                   }| j                  |«      }| j                  | j                  |«      }| j                  ||¬«      \  }}|st	        d„ ||fD «       «      S t        ||¬«      S )aÑ  
        past_values (`torch.FloatTensor` of shape `(batch_size, seq_length, num_input_channels)`):
            Context values of the time series. For a pretraining task, this denotes the input time series to
            predict the masked portion. For a forecasting task, this denotes the history/past time series values.
            Similarly, for classification or regression tasks, it denotes the appropriate context values of the
            time series.

            For univariate time series, `num_input_channels` dimension should be 1. For multivariate time series,
            it is greater than 1.

        Returns:
            `torch.FloatTensor` of shape `(batch_size, n_vars, num_patches, d_model)`
        )rï   c              3   ó    K  — | ]  }|–— Œ y ­wr(   r2  ©r  Úvs     r%   r
  z.PatchTSMixerEncoder.forward.<locals>.<genexpr>’  s   è ø€ ò àô ñùó   ‚)r§  r³   )r¬  r­  r®  r¯  rÈ   r¦  )r#   r#  rï   r±  Úpatchesr§  r³   s          r%   r+   zPatchTSMixerEncoder.forwardq  s˜   € ð* &1Ð%<‘kÀ$×BVÑBVˆð —,‘,˜{Ó+ˆð ×"Ñ"Ð.Ø×-Ñ-¨gÓ6ˆGà+/×+AÑ+AÀ'Ð`tÐ+AÓ+uÑ(Ð˜=áÜñ ð &Ø!ðôó ð ô )Ð;LÐ\iÔjÐjr&   )FN)r,   r-   r.   r/   r   r   r   rC   rD   r   rÇ   r   rÈ   r¦  r+   r1   r2   s   @r%   rª  rª  X  st   ø„ ñðÐ1õ ð  ð 05Ø&*ñ	(kà—\‘\ð(kð ' t™nð(kð ˜d‘^ð	(kð
 
ˆuÐ/Ð/Ñ	0ò(kó ô(kr&   rª  zG
    Base class for model's outputs, with potential hidden states.
    c                   ó  — e Zd ZU dZdZeej                     ed<   dZ	ee
ej                        ed<   dZeej                     ed<   dZeej                     ed<   dZeej                     ed<   dZeej                     ed<   y)	ÚPatchTSMixerModelOutputa  
    last_hidden_state (`torch.FloatTensor`  of shape `(batch_size, num_channels, num_patches, d_model)`):
        Hidden-state at the output of the last layer of the model.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*):
        Hidden-states of the model at the output of each layer.
    patch_input (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches, patch_length)`):
        Patched input data to the model.
    mask (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches)`, *optional*):
        Bool Tensor indicating True in masked patches and False otherwise.
    loc (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*):
        Gives the mean of the context window per channel. Used for revin denorm outside the model, if revin
        enabled.
    scale (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*):
        Gives the std dev of the context window per channel. Used for revin denorm outside the model, if revin
        enabled.
    Nr§  r³   rc   rM  r‰  r‹  )r,   r-   r.   r/   r§  r   rC   r¨  r.  r³   rÈ   rc   rM  r‰  r‹  r2  r&   r%   r¹  r¹    s   … ñð" 6:Ðx × 1Ñ 1Ñ2Ó9Ø8<€M8˜E %×"3Ñ"3Ñ4Ñ5Ó<Ø/3€K˜%×+Ñ+Ñ,Ó3Ø(,€Dˆ(5×$Ñ$Ñ
%Ó,Ø'+€Cˆ%×#Ñ#Ñ	$Ó+Ø)-€Eˆ8E×%Ñ%Ñ&Ô-r&   r¹  z=
    The PatchTSMixer Model for time-series forecasting.
    c                   ó’   ‡ — e Zd Zd
dedefˆ fd„Ze	 	 	 ddej                  de	ej                     de	e   de	e   de
f
d	„«       Zˆ xZS )ÚPatchTSMixerModelr5   Ú
mask_inputc                 ó´  •— t         ‰|   |«       |j                  | _        t        |«      | _        t        |«      | _        |du rt        |«      | _        nd| _        |j                  dk(  rt        |«      | _        n>|j                  dk(  s|j                  du rt        |«      | _        nt        |«      | _        |j                  r| j                  «        yy)z•
        mask_input (bool, *optional*, defaults to `False`):
            Whether to mask the input using the [`PatchTSMixerMasking`] module.
        TNr^   r_   )r   r   r¬  rª  Úencoderrh  Úpatchingrt  Úmaskingr–   rŽ  Úscalerr|  r¡  r°  )r#   r5   r¼  r$   s      €r%   r   zPatchTSMixerModel.__init__Ã  s´   ø€ ô
 	‰Ñ˜Ô à%×5Ñ5ˆÔÜ*¨6Ó2ˆŒÜ,¨VÓ4ˆŒà˜ÑÜ.¨vÓ6ˆDLàˆDŒLà>‰>˜VÒ#Ü0°Ó8ˆDKØ^‰^˜uÒ$¨¯©¸$Ñ(>Ü/°Ó7ˆDKä/°Ó7ˆDŒKð ×ÒØN‰NÕð r&   r#  Úobserved_maskrï   r±  rN   c           	      óä  — ||n| j                   }d}|€t        j                  |«      }| j                  ||«      \  }}}| j	                  |«      }	|	}
| j
                  | j                  |	«      \  }
}| j                  |
||¬«      }t        |t        «      rt        |Ž }|s,t        d„ |j                  |j                  |	|||fD «       «      S t        |j                  |j                  |	|||¬«      S )aë  
        past_values (`torch.FloatTensor` of shape `(batch_size, seq_length, num_input_channels)`):
            Context values of the time series. For a pretraining task, this denotes the input time series to predict
            the masked portion. For a forecasting task, this denotes the history/past time series values. Similarly,
            for classification or regression tasks, it denotes the appropriate context values of the time series.

            For univariate time series, `num_input_channels` dimension should be 1. For multivariate time series, it is
            greater than 1.
        observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
            Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
            in `[0, 1]`:
            - 1 for values that are **observed**,
            - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
        N©rï   r±  c              3   ó    K  — | ]  }|–— Œ y ­wr(   r2  r´  s     r%   r
  z,PatchTSMixerModel.forward.<locals>.<genexpr>  ó   è ø€ ò 
àô ñ
ùr¶  )r§  r³   rc   rM  r‰  r‹  )r¬  rC   r˜  rÁ  r¿  rÀ  r¾  r  rÈ   r¦  r§  r³   r¹  )r#   r#  rÂ  rï   r±  rM  Úscaled_past_valuesr‰  r‹  Ú	patched_xÚ	enc_inputÚencoder_outputs               r%   r+   zPatchTSMixerModel.forwardÞ  s  € ð, &1Ð%<‘kÀ$×BVÑBVˆàˆØÐ Ü!ŸO™O¨KÓ8ˆMØ)-¯©°[À-Ó)PÑ&Ð˜C à—M‘MÐ"4Ó5ˆ	àˆ	Ø<‰<Ð#Ø"Ÿl™l¨9Ó5‰OˆItð Ÿ™ØØ!5Ø#ð &ó 
ˆô n¤eÔ,Ü6¸ÐGˆNáÜñ 
ð #×4Ñ4Ø"×0Ñ0ØØØØðô
ó 
ð 
ô 'Ø,×>Ñ>Ø(×6Ñ6Ø!ØØØô
ð 	
r&   rõ   )NFN)r,   r-   r.   r   rÇ   r   r   rC   rD   r   r¹  r+   r1   r2   s   @r%   r»  r»  ½  s‚   ø„ ñÐ1ð ¸tõ ð6 ð 15Ø/4Ø&*ñA
à—\‘\ðA
ð   §¡Ñ-ðA
ð ' t™nð	A
ð
 ˜d‘^ðA
ð 
!òA
ó ôA
r&   r»  z>
    Output type of [`PatchTSMixerForPreTrainingOutput`].
    c                   ó¸   — e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeeej                        ed<   y)Ú PatchTSMixerForPreTrainingOutputa@  
    loss (*optional*, returned when `y` is provided, `torch.FloatTensor` of shape `()`):
        Total loss
    prediction_outputs (`torch.FloatTensor` of shape `(batch_size, num_input_channels, num_patches, patch_length)`):
        Prediction output from the pretrain head.
    last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_input_channels, num_patches, d_model)`):
        Backbone embeddings before passing through the head.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*):
        Hidden-states of the model at the output of each layer.
    NÚlossÚprediction_outputsr§  r³   ©r,   r-   r.   r/   rÍ  r   rC   r¨  r.  rÎ  r§  r³   rÈ   r2  r&   r%   rÌ  rÌ  #  ód   … ñ	ð )-€Dˆ(5×$Ñ$Ñ
%Ó,Ø6:Ð˜ ×!2Ñ!2Ñ3Ó:Ø59Ðx × 1Ñ 1Ñ2Ó9Ø8<€M8˜E %×"3Ñ"3Ñ4Ñ5Ô<r&   rÌ  z.
    `PatchTSMixer` for mask pretraining.
    c                   ó’   ‡ — e Zd Zdefˆ fd„Ze	 	 	 	 d
dej                  deej                     dee	   de	dee	   de
fd	„«       Zˆ xZS )ÚPatchTSMixerForPretrainingr5   c                 óè   •— t         ‰|   |«       t        |d¬«      | _        t	        |¬«      | _        |j                  | _        |j                  | _        |j                  r| j                  «        y y )NT)r¼  rà   )	r   r   r»  r"  r4  ÚheadÚmasked_lossr¬  r°  r=   s     €r%   r   z#PatchTSMixerForPretraining.__init__A  s`   ø€ Ü‰Ñ˜Ô Ü& v¸$Ô?ˆŒ
Ü,°FÔ;ˆŒ	Ø!×-Ñ-ˆÔØ%×5Ñ5ˆÔð ×ÒØN‰NÕð r&   r#  rÂ  rï   Úreturn_lossr±  rN   c                 óØ  — ||n| j                   }| j                  du r!t        j                  j	                  d¬«      }n t        j                  j	                  d¬«      }| j                  ||||¬«      }t        |t        «      rt        |Ž }| j                  |j                  «      }|du r |||j                  «      }	nd}	| j                  du rM|	K|	j                  d¬«      |j                  z  j                  «       |j                  j                  «       d	z   z  }	|s*t        d
„ |	||j                  |j                  fD «       «      S t!        |	||j                  |j                  ¬«      S )aT  
        past_values (`torch.FloatTensor` of shape `(batch_size, seq_length, num_input_channels)`):
            Context values of the time series. For a pretraining task, this denotes the input time series to predict
            the masked portion. For a forecasting task, this denotes the history/past time series values. Similarly,
            for classification or regression tasks, it denotes the appropriate context values of the time series.

            For univariate time series, `num_input_channels` dimension should be 1. For multivariate time series, it is
            greater than 1.
        observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
            Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
            in `[0, 1]`:
            - 1 for values that are **observed**,
            - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
        return_loss (`bool`,  *optional*):
            Whether to return the loss in the `forward` call.
        NTÚnone©Ú	reductionr^   ©rÂ  rï   r±  r   r   r  c              3   ó    K  — | ]  }|–— Œ y ­wr(   r2  r´  s     r%   r
  z5PatchTSMixerForPretraining.forward.<locals>.<genexpr>‚  ó   è ø€ ò àô ñùr¶  ©rÍ  rÎ  r§  r³   )r¬  rÕ  rC   r   ÚMSELossr"  r  rÈ   r¹  rÔ  r§  rc   r^   rM  rX  r³   rÌ  )
r#   r#  rÂ  rï   rÖ  r±  rÍ  Úmodel_outputÚx_hatÚloss_vals
             r%   r+   z"PatchTSMixerForPretraining.forwardL  so  € ð2 &1Ð%<‘kÀ$×BVÑBVˆà×Ñ˜tÑ#Ü—8‘8×#Ñ#¨fÐ#Ó5‰Dä—8‘8×#Ñ#¨fÐ#Ó5ˆDð —z‘zØØ'Ø!5Ø#ð	 "ó 
ˆô l¤EÔ*Ü2°LÐAˆLà—	‘	˜,×8Ñ8Ó9ˆà˜$ÑÙ˜E <×#;Ñ#;Ó<‰HàˆHð ×Ñ˜tÑ#¨Ð(<Ø Ÿ™¨"˜Ó-°×0AÑ0AÑA×FÑFÓHÈL×L]ÑL]×LaÑLaÓLcÐfkÑLkÑlˆHáÜñ ð ØØ ×2Ñ2Ø ×.Ñ.ð	ôó ð ô 0ØØ$Ø*×<Ñ<Ø&×4Ñ4ô	
ð 	
r&   ©NFTN)r,   r-   r.   r   r   r   rC   rD   r   rÇ   rÌ  r+   r1   r2   s   @r%   rÒ  rÒ  ;  s‰   ø„ ð	Ð1õ 	ð ð 15Ø/4Ø Ø&*ñD
à—\‘\ðD
ð   §¡Ñ-ðD
ð ' t™nð	D
ð
 ðD
ð ˜d‘^ðD
ð 
*òD
ó ôD
r&   rÒ  z=
    Output type of [`PatchTSMixerForPredictionOutput`].
    c                   ó  — e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeeej                        ed<   dZeej                     ed<   dZeej                     ed<   y)	ÚPatchTSMixerForPredictionOutputaD  
    loss (*optional*, returned when `y` is provided, `torch.FloatTensor` of shape `()`):
        Total loss.
    prediction_outputs (`torch.FloatTensor` of shape `(batch_size, prediction_length, num_input_channels)`):
        Prediction output from the forecast head.
    last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_input_channels, num_patches, d_model)`):
        Backbone embeddings before passing through the head.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*):
        Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
    loc (`torch.FloatTensor`, *optional* of shape `(batch_size, 1, num_input_channels)`):
        Input mean
    scale (`torch.FloatTensor`, *optional* of shape `(batch_size, 1, num_input_channels)`):
        Input std dev
    NrÍ  rÎ  r§  r³   r‰  r‹  )r,   r-   r.   r/   rÍ  r   rC   r¨  r.  rÎ  r§  r³   rÈ   r‰  r‹  r2  r&   r%   rå  rå  ”  sŽ   … ñð )-€Dˆ(5×$Ñ$Ñ
%Ó,Ø6:Ð˜ ×!2Ñ!2Ñ3Ó:Ø59Ðx × 1Ñ 1Ñ2Ó9Ø8<€M8˜E %×"3Ñ"3Ñ4Ñ5Ó<Ø'+€Cˆ%×#Ñ#Ñ	$Ó+Ø)-€Eˆ8E×%Ñ%Ñ&Ô-r&   rå  zƒ
    Base class for time series model's predictions outputs that contains the sampled values from the chosen
    distribution.
    c                   ó:   — e Zd ZU dZdZeej                     ed<   y)Ú"SamplePatchTSMixerPredictionOutputú¨
    sequences (`torch.FloatTensor` of shape `(batch_size, num_samples, prediction_length, number_channels)`):
        Sampled values from the chosen distribution.
    NÚ	sequences©	r,   r-   r.   r/   ré  r   rC   r¨  r.  r2  r&   r%   rç  rç  ²  ó   … ñð
 .2€Iˆx˜×)Ñ)Ñ*Ô1r&   rç  c                   ó:   — e Zd ZU dZdZeej                     ed<   y)Ú"SamplePatchTSMixerRegressionOutputrè  Nré  rê  r2  r&   r%   rí  rí  Â  rë  r&   rí  ÚinputÚtargetrN   c                 ó&   — | j                  |«       S )zc
    Computes the negative log likelihood loss from input distribution with respect to target.
    )Úlog_prob)rî  rï  s     r%   Únllrò  Ó  s   € ð N‰N˜6Ó"Ð"Ð"r&   Úinput_tensorÚweightsc                 óP  — |“t        j                  |dk7  | |z  t        j                  | «      «      }t        j                  |r|j	                  |¬«      n|j	                  «       d¬«      }|r|j	                  |¬«      |z  S |j	                  «       |z  S | j                  |¬«      S )aj  
    Computes the weighted average of a given tensor across a given `dim`, masking values associated with weight zero,
    meaning instead of `nan * 0 = nan` you will get `0 * 0 = 0`.

    Args:
        input_tensor (`torch.FloatTensor`):
            Input tensor, of which the average must be computed.
        weights (`torch.FloatTensor`, *optional*):
            Weights tensor, of the same shape as `input_tensor`.
        dim (`int`, *optional*):
            The dim along which to average `input_tensor`.

    Returns:
        `torch.FloatTensor`: The tensor with values averaged along the specified `dim`.
    r   r   r%  r“  )rC   r™  rš  r–  rX  r^   )ró  rô  r   Úweighted_tensorÚsum_weightss        r%   Úweighted_averagerø  Û  s›   € ð  ÐÜŸ+™+ g°¡l°LÀ7Ñ4JÌE×L\ÑL\Ð]iÓLjÓkˆÜ—k‘k¹# '§+¡+°# +Ô"6À7Ç;Á;Ã=ÐVYÔZˆÙ03×#Ñ#¨Ð#Ó,ÐR]Ñ]Ð]¸×9LÑ9LÓ9NÐR]Ñ]Ð]à× Ñ  SÐ Ó)Ð)r&   c                   ó$  ‡ — e Zd ZdZdefˆ fd„Ze	 	 	 	 	 ddej                  de	ej                     de	ej                     de	e
   de
d	e	e
   d
efd„«       Z ej                  «       	 ddej                  de	ej                     d
efd„«       Zˆ xZS )ÚPatchTSMixerForPredictionz 
    `PatchTSMixer` for forecasting application.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.

    Returns:
        `None`.
    r5   c                 ó2  •— t         ‰|   |«       |j                  | _        |j                  | _        |j                  | _        |j
                  | _        |j                  dk(  rd | _        nc|j                  }t        t        t        dœ}|j                  |j                  d «      }| ||¬«      | _        nt        d|j                  › «      ‚t        |«      | _        t        || j                  ¬«      | _        |j"                  r| j#                  «        y y )NÚmse©Ú	student_tÚnormalÚnegative_binomialr   úUnknown distribution output ©r5   r  )r   r   rÍ  r¬  rü   Únum_parallel_samplesr  r   r   r   r   Úgetr`   r»  r"  r÷   rÔ  r°  )r#   r5   r   Údistribution_output_mapÚoutput_classr$   s        €r%   r   z"PatchTSMixerForPrediction.__init__ÿ  sû   ø€ Ü‰Ñ˜Ô Ø—K‘KˆŒ	Ø%×5Ñ5ˆÔØ*0×*KÑ*KˆÔ'Ø$*×$?Ñ$?ˆÔ!à;‰;˜%ÒØ'+ˆDÕ$à×*Ñ*ˆCä+Ü&Ü%;ñ'Ð#ð
 3×6Ñ6°v×7QÑ7QÐSWÓXˆLØÐ'Ù+7¸CÔ+@Õ(ä Ð#?À×@ZÑ@ZÐ?[Ð!\Ó]Ð]ä& vÓ.ˆŒ
Ü1ØØ $× 8Ñ 8ô
ˆŒ	ð ×ÒØN‰NÕð r&   r#  rÂ  Úfuture_valuesrï   rÖ  r±  rN   c           	      ó‚  — | j                   dk(  rt        j                  d¬«      }n!| j                   dk(  rt        }nt	        d«      ‚||n| j
                  }| j                  ||||¬«      }t        |t        «      rt        |Ž }| j                  |j                  «      }	d}
| j                  á| j                  r|| j                  j                  |	|j                  d| j                  f   |j                   d| j                  f   ¬	«      }||d
u r |||d| j                  f   «      }
t#        |
«      }
nÝ|	|j                   d| j                  f   z  |j                  d| j                  f   z   }	|¡|d
u r ||	|d| j                  f   «      }
n„| j                  rM| j                  j                  |	|j                  |j                   ¬	«      }|D|d
u r@ |||«      }
t#        |
«      }
n+|	|j                   z  |j                  z   }	||d
u r	 ||	|«      }
| j                  7|j                  d| j                  f   }|j                   d| j                  f   }n|j                  }|j                   }|s,t        d„ |
|	|j                  |j$                  ||fD «       «      S t'        |
|	|j                  |j$                  ||¬«      S )aÖ  
        past_values (`torch.FloatTensor` of shape `(batch_size, seq_length, num_input_channels)`):
            Context values of the time series. For a pretraining task, this denotes the input time series to predict
            the masked portion. For a forecasting task, this denotes the history/past time series values. Similarly,
            for classification or regression tasks, it denotes the appropriate context values of the time series.

            For univariate time series, `num_input_channels` dimension should be 1. For multivariate time series, it is
            greater than 1.
        observed_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
            Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
            in `[0, 1]`:
            - 1 for values that are **observed**,
            - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
        future_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,:
            `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*):
            Target values of the time series, that serve as labels for the model. The `future_values` is what the
            Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
            required for a pretraining task.

            For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
            to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
            pass the target data with all channels, as channel Filtering for both prediction and target will be
            manually applied before the loss computation.
        return_loss (`bool`,  *optional*):
            Whether to return the loss in the `forward` call.
        rü  r^   rÙ  rò  ú2Invalid loss function: Allowed values: mse and nllNrÛ  .©r‰  r‹  Tc              3   ó    K  — | ]  }|–— Œ y ­wr(   r2  r´  s     r%   r
  z4PatchTSMixerForPrediction.forward.<locals>.<genexpr>„  rÆ  r¶  )rÍ  rÎ  r§  r³   r‰  r‹  )rÍ  r   rß  rò  r`   r¬  r"  r  rÈ   r¹  rÔ  r§  rü   r  Údistributionr‰  r‹  rø  r³   rå  )r#   r#  rÂ  r  rï   rÖ  r±  rÍ  rà  Úy_hatrâ  r  r‰  r‹  s                 r%   r+   z!PatchTSMixerForPrediction.forward  s  € ðH 9‰9˜ÒÜ—:‘:¨Ô/‰DØY‰Y˜%ÒÜ‰DäÐQÓRÐRà%0Ð%<‘kÀ$×BVÑBVˆð —z‘zØØ'Ø!5Ø#ð	 "ó 
ˆô l¤EÔ*Ü2°LÐAˆLð —	‘	˜,×8Ñ8Ó9ˆàˆØ×*Ñ*Ð6Ø×'Ò'Ø#×7Ñ7×DÑDØØ$×(Ñ(¨¨d×.MÑ.MÐ)MÑNØ&×,Ñ,¨S°$×2QÑ2QÐ-QÑRð  Eó  ð
 !Ñ,°ÀÒ1DÙ#Ø$Ø% c¨4×+JÑ+JÐ&JÑKó Hô
  0°Ó9‘Hð ˜L×.Ñ.¨s°D×4SÑ4SÐ/SÑTÑTØ"×&Ñ& s¨D×,KÑ,KÐ'KÑLñMð ð !Ð,°ÀÑ1DÙ# E¨=¸¸d×>]Ñ>]Ð9]Ñ+^Ó_‘Hà×'Ò'Ø#×7Ñ7×DÑDØ˜|×/Ñ/°|×7IÑ7Ið  Eó  ð !Ð,°ÀÑ1DÙ# L°-Ó@HÜ/°Ó9‘Hà × 2Ñ 2Ñ2°\×5EÑ5EÑEØ Ð,°ÀÑ1DÙ# E¨=Ó9Hà×*Ñ*Ð6Ø×"Ñ" 3¨×(GÑ(GÐ#GÑHˆCØ ×&Ñ& s¨D×,KÑ,KÐ'KÑL‰Eà×"Ñ"ˆCØ ×&Ñ&ˆEáÜñ 
ð ØØ ×2Ñ2Ø ×.Ñ.ØØðô
ó 
ð 
ô /ØØ$Ø*×<Ñ<Ø&×4Ñ4ØØô
ð 	
r&   c                 óF  — | j                   } | |d|d¬«      }| j                  j                  |j                  |j                  |j
                  ¬«      }t        |«      D cg c]  }|j                  «       ‘Œ }}t        j                  |d¬«      }t        |¬«      S c c}w )aÀ  
        Generate sequences of sample predictions from a model with a probability distribution head.

        Args:
            past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                Past values of the time series that serves as context in order to predict the future.

            observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
                in `[0, 1]`:

                - 1 for values that are **observed**,
                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).

        Return:
            [`SamplePatchTSMixerPredictionOutput`] where the outputs `sequences` tensor will have shape `(batch_size,
            number of samples, prediction_length, num_input_channels)`.
        NF)r#  r  rÂ  rï   r
  r   r   ©ré  )r  r  r  rÎ  r‰  r‹  rí   ÚsamplerC   Ústackrç  )r#   r#  rÂ  r  Úoutputsr  rÕ   Úsampless           r%   Úgeneratez"PatchTSMixerForPrediction.generate™  s¡   € ð2  $×8Ñ8Ðñ Ø#ØØ'Ø!&ô	
ˆð ×/Ñ/×<Ñ<Ø×&Ñ&¨G¯K©K¸w¿}¹}ð =ó 
ˆô
 38Ð8LÓ2MÖN¨Q<×&Ñ&Õ(ÐNˆÐNô —+‘+˜g¨1Ô-ˆÜ1¸GÔDÐDùò	 Os   Á"B)NNFTNr(   )r,   r-   r.   r/   r   r   r   rC   rD   r   rÇ   rå  r+   Úno_gradrç  r  r1   r2   s   @r%   rú  rú  ó  sö   ø„ ñ	ðÐ1õ ð@ ð 15Ø04Ø/4Ø Ø&*ñw
à—\‘\ðw
ð   §¡Ñ-ðw
ð   §¡Ñ-ð	w
ð
 ' t™nðw
ð ðw
ð ˜d‘^ðw
ð 
)òw
ó ðw
ðr €U‡]]ƒ_ð 15ñ-Eà—\‘\ð-Eð   §¡Ñ-ð-Eð 
,ò	-Eó ô-Er&   rú  zK
    Output type of [`PatchTSMixerForTimeSeriesClassificationOutput`].
    c                   ó¸   — e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeeej                        ed<   y)Ú-PatchTSMixerForTimeSeriesClassificationOutputaP  
    loss (*optional*, returned when `y` is provided, `torch.FloatTensor` of shape `()`):
        Total loss.
    prediction_outputs (`torch.FloatTensor` of shape `(batch_size, num_labels)`):
        Prediction output from the classification head.
    last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_input_channels, num_patches, d_model)`):
        Backbone embeddings before passing through the head.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*):
        Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
    NrÍ  rÎ  r§  r³   rÏ  r2  r&   r%   r  r  Ê  rÐ  r&   r  c                   ó–   ‡ — e Zd ZdZdefˆ fd„Ze	 	 	 	 ddej                  de	ej                     de	e
   de
de	e
   d	efd
„«       Zˆ xZS )Ú'PatchTSMixerForTimeSeriesClassificationz£
    `PatchTSMixer` for classification application.

    Args:
        config (`PatchTSMixerConfig`):
            Configuration.

    Returns:
        `None`.
    r5   c                 ó:  •— t         ‰|   |«       t        |«      | _        t	        |¬«      | _        |j                  | _        |j                  dv r't        |j                  |j                  ¬«      | _        nd | _        |j                  r| j                  «        y y )Nrà   ©r_   r^   T©r:   rM   )r   r   r»  r"  r  rÔ  r¬  r–   ÚInjectScalerStatistics4Dr:   rM   Úinject_scaler°  r=   s     €r%   r   z0PatchTSMixerForTimeSeriesClassification.__init__î  s„   ø€ Ü‰Ñ˜Ô ä& vÓ.ˆŒ
Ü*Øô
ˆŒ	ð  &×5Ñ5ˆÔØ>‰>Ð2Ñ2Ü 8ÀÇÁÐ]c×]oÑ]oÔ pˆDÕà $ˆDÔð ×ÒØN‰NÕð r&   r#  Útarget_valuesrï   rÖ  r±  rN   c                 ó4  — t         j                  j                  «       }||n| j                  }| j	                  |||¬«      }t        |t        «      rt        |Ž }| j                  7| j                  |j                  |j                  |j                  ¬«      |_	        | j                  |j                  «      }||du r
 |||«      }	nd}	|s*t        d„ |	||j                  |j                  fD «       «      S t        |	||j                  |j                  ¬«      S )að  
        past_values (`torch.FloatTensor` of shape `(batch_size, seq_length, num_input_channels)`):
            Context values of the time series. For a pretraining task, this denotes the input time series to predict
            the masked portion. For a forecasting task, this denotes the history/past time series values. Similarly,
            for classification or regression tasks, it denotes the appropriate context values of the time series.

            For univariate time series, `num_input_channels` dimension should be 1. For multivariate time series, it is
            greater than 1.
        target_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,
            `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*):
            Target
            values of the time series, that serve as labels for the model. The `target_values` is what the
            Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
            required for a pretraining task.

            For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
            to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
            pass the target data with all channels, as channel Filtering for both prediction and target will be
            manually applied before the loss computation.

            For a classification task, it has a shape of `(batch_size,)`.

            For a regression task, it has a shape of `(batch_size, num_targets)`.
        return_loss (`bool`, *optional*):
            Whether to return the loss in the `forward` call.
        NrÄ  r
  Tc              3   ó    K  — | ]  }|–— Œ y ­wr(   r2  r´  s     r%   r
  zBPatchTSMixerForTimeSeriesClassification.forward.<locals>.<genexpr>>  rÝ  r¶  rÞ  )rC   r   ÚCrossEntropyLossr¬  r"  r  rÈ   r¹  r  r§  r‰  r‹  rÔ  r³   r  )
r#   r#  r  rï   rÖ  r±  rÍ  rà  r  râ  s
             r%   r+   z/PatchTSMixerForTimeSeriesClassification.forwardÿ  s.  € ôH x‰x×(Ñ(Ó*ˆà%0Ð%<‘kÀ$×BVÑBVˆà—z‘zØØ!5Ø#ð "ó 
ˆô
 l¤EÔ*Ü2°LÐAˆLà×ÑÐ(Ø-1×->Ñ->Ø×.Ñ.Ø ×$Ñ$Ø"×(Ñ(ð .?ó .ˆLÔ*ð —	‘	˜,×8Ñ8Ó9ˆàÐ$¨¸Ñ)<Ù˜E =Ó1‰HàˆHáÜñ ð ØØ ×2Ñ2Ø ×.Ñ.ð	ôó ð ô =ØØ$Ø*×<Ñ<Ø&×4Ñ4ô	
ð 	
r&   rã  )r,   r-   r.   r/   r   r   r   rC   rD   r   rÇ   r  r+   r1   r2   s   @r%   r  r  â  sŽ   ø„ ñ	ðÐ1õ ð" ð 15Ø/4Ø Ø&*ñM
à—\‘\ðM
ð   §¡Ñ-ðM
ð ' t™nð	M
ð
 ðM
ð ˜d‘^ðM
ð 
7òM
ó ôM
r&   r  z=
    Output type of [`PatchTSMixerForRegressionOutput`].
    c                   ó¸   — e Zd ZU dZdZeej                     ed<   dZ	eej                     ed<   dZ
eej                     ed<   dZeeej                        ed<   y)ÚPatchTSMixerForRegressionOutputaM  
    loss (*optional*, returned when `y` is provided, `torch.FloatTensor` of shape `()`):
        Total loss.
    regression_outputs (`torch.FloatTensor` of shape `(batch_size, num_targets)`):
        Prediction output from the regression head.
    last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_input_channels, num_patches, d_model)`):
        Backbone embeddings before passing through the head.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*):
        Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
    NrÍ  Úregression_outputsr§  r³   )r,   r-   r.   r/   rÍ  r   rC   r¨  r.  r%  r§  r³   rÈ   r2  r&   r%   r$  r$  P  rÐ  r&   r$  c                   ó~   ‡ — e Zd Zd	dededefˆ fd„Zdej                  dej                  dej                  fd„Zˆ xZS )
r  r:   rM   Ú	expansionc                 ó&  •— t         ‰|   «        t        j                  |dz   ||z  «      | _        t        j                  ||z  |«      | _        t        j                  dd|z  «      | _        t        j                  d|z  d«      | _        || _        y rU  )	r   r   r   r   Úinverse_trans_expansionÚinverse_trans_compressionÚmap_scale_expansionÚmap_scale_compressionrM   )r#   r:   rM   r'  r$   s       €r%   r   z!InjectScalerStatistics4D.__init__i  sx   ø€ Ü‰ÑÔä')§y¡y°¸1±¸iÈ'Ñ>QÓ'RˆÔ$Ü)+¯©°9¸wÑ3FÈÓ)PˆÔ&Ü#%§9¡9¨Q°°I±Ó#>ˆÔ Ü%'§Y¡Y¨q°9©}¸aÓ%@ˆÔ"Ø&ˆÕr&   r)   r‰  r‹  c                 óö  — |j                  dd«      }|j                  d«      }|j                  dd| j                  d«      }|j                  dd«      }|j                  d«      }|j                  dd| j                  d«      }t	        j
                  ||gd¬«      }| j                  |«      }| j                  |«      }t	        j
                  ||gd¬«      }| j                  |«      }| j                  |«      }|S )a‰  
        Args:
            inputs (`torch.Tensor` of shape `(batch_size, num_input_channels, num_patch, d_model)`)
            loc (`torch.Tensor` of shape `(batch_size, 1, num_input_channels)`)
            scale (`torch.Tensor` of shape `(batch_size, 1, num_input_channels)`)
        Returns:
            `torch.Tensor` of shape `(batch_size, num_input_channels, num_patch, d_model)`
        r   rù   r   r   )
r@   rX   rC  rM   rC   Úcatr+  r,  r)  r*  )r#   r)   r‰  r‹  r^   ÚstdevÚconcat_statss          r%   r+   z InjectScalerStatistics4D.forwardr  sâ   € ð }‰}˜R Ó$ˆØ~‰~˜bÓ!ˆØ{‰{˜1˜a ×!1Ñ!1°1Ó5ˆà—‘  BÓ'ˆØ—‘ Ó#ˆØ—‘˜Q  4×#3Ñ#3°QÓ7ˆä—y‘y $¨ °BÔ7ˆà×/Ñ/°Ó=ˆØ×1Ñ1°,Ó?ˆä—‘˜F LÐ1°rÔ:ˆØ×-Ñ-¨fÓ5ˆØ×/Ñ/°Ó7ˆàˆr&   )r?   )	r,   r-   r.   r0   r   rC   rD   r+   r1   r2   s   @r%   r  r  h  sC   ø„ ñ' ð '°#ð 'À#õ 'ð˜eŸl™lð °·±ð ÀeÇlÁl÷ r&   r  z4
    `PatchTSMixer` for regression application.
    c                   óÞ   ‡ — e Zd Zdefˆ fd„Ze	 	 	 	 ddej                  deej                     dee	   de	dee	   de
fd	„«       Z ej                  «       dej                  defd
„«       Zˆ xZS )ÚPatchTSMixerForRegressionr5   c                 ó¤  •— t         ‰|   |«       t        |«      | _        |j                  | _        |j
                  | _        |j                  | _        |j                  | _        |j                  dk(  rd | _        n`t        t        t        dœ}|j                  |j
                  «      }| ||j                  ¬«      | _        nt        d|j
                  › «      ‚|j                  dv r't        |j                   |j"                  ¬«      | _        nd | _        t'        || j
                  ¬«      | _        |j*                  r| j+                  «        y y )Nrü  rý  r   r  r  r  r  )r   r   r»  r"  rÍ  r  r¬  r  r   r   r   r  r  r`   r–   r  r:   rM   r  r  rÔ  r°  )r#   r5   r  r  r$   s       €r%   r   z"PatchTSMixerForRegression.__init__–  s$  ø€ Ü‰Ñ˜Ô ä& vÓ.ˆŒ
à—K‘KˆŒ	Ø#)×#=Ñ#=ˆÔ à%×5Ñ5ˆÔØ$*×$?Ñ$?ˆÔ!à;‰;˜%ÒØ'+ˆDÕ$ô ,Ü&Ü%;ñ'Ð#ð
 3×6Ñ6°v×7QÑ7QÓRˆLØÐ'Ù+7¸F×<NÑ<NÔ+OÕ(ä Ð#?À×@ZÑ@ZÐ?[Ð!\Ó]Ð]à>‰>Ð2Ñ2Ü 8ÀÇÁÐ]c×]oÑ]oÔ pˆDÕà $ˆDÔä*ØØ $× 8Ñ 8ô
ˆŒ	ð ×ÒØN‰NÕð r&   r#  r  rï   rÖ  r±  rN   c           	      óæ  — | j                   dk(  rt        j                  d¬«      }n!| j                   dk(  rt        }nt	        d«      ‚||n| j
                  }| j                  |||¬«      }t        |t        «      rt        |Ž }| j                  7| j                  |j                  |j                  |j                  ¬«      |_        | j                  |j                  «      }|¹|d	u rµ| j                  rŸ| j                  d
k(  r#t!        j"                  |dk  «      rt%        d«      ‚| j                  j'                  |«      }	t        |D 
cg c](  }
|
j)                  d| j*                  j,                  «      ‘Œ* c}
«      } ||	|«      }t/        |«      }n |||«      }nd}|s*t        d„ |||j                  |j0                  fD «       «      S t3        |||j                  |j0                  ¬«      S c c}
w )aä  
        past_values (`torch.FloatTensor` of shape `(batch_size, seq_length, num_input_channels)`):
            Context values of the time series. For a pretraining task, this denotes the input time series to predict
            the masked portion. For a forecasting task, this denotes the history/past time series values. Similarly,
            for classification or regression tasks, it denotes the appropriate context values of the time series.

            For univariate time series, `num_input_channels` dimension should be 1. For multivariate time series, it is
            greater than 1.
        target_values (`torch.FloatTensor` of shape `(batch_size, target_len, num_input_channels)` for forecasting,
            `(batch_size, num_targets)` for regression, or `(batch_size,)` for classification, *optional*):
            Target values of the time series, that serve as labels for the model. The `target_values` is what the
            Transformer needs during training to learn to output, given the `past_values`. Note that, this is NOT
            required for a pretraining task.

            For a forecasting task, the shape is be `(batch_size, target_len, num_input_channels)`. Even if we want
            to forecast only specific channels by setting the indices in `prediction_channel_indices` parameter,
            pass the target data with all channels, as channel Filtering for both prediction and target will be
            manually applied before the loss computation.

            For a classification task, it has a shape of `(batch_size,)`.

            For a regression task, it has a shape of `(batch_size, num_targets)`.
        return_loss (`bool`, *optional*):
            Whether to return the loss in the `forward` call.
        rü  r^   rÙ  rò  r	  NrÄ  r
  Tr   r   zDtarget_values cannot be negative for negative_binomial distribution.r   c              3   ó    K  — | ]  }|–— Œ y ­wr(   r2  r´  s     r%   r
  z4PatchTSMixerForRegression.forward.<locals>.<genexpr>	  rÝ  r¶  )rÍ  r%  r§  r³   )rÍ  r   rß  rò  r`   r¬  r"  r  rÈ   r¹  r  r§  r‰  r‹  rÔ  r  rC   ÚanyÚ	Exceptionr  rŸ   r5   r  rø  r³   r$  )r#   r#  r  rï   rÖ  r±  rÍ  rà  r  r  Úitemrâ  s               r%   r+   z!PatchTSMixerForRegression.forward½  sæ  € ðF 9‰9˜ÒÜ—:‘:¨Ô/‰DØY‰Y˜%ÒÜ‰DäÐQÓRÐRà%0Ð%<‘kÀ$×BVÑBVˆØ—z‘zØØ!5Ø#ð "ó 
ˆô
 l¤EÔ*Ü2°LÐAˆLà×ÑÐ(Ø-1×->Ñ->Ø×.Ñ.Ø ×$Ñ$Ø"×(Ñ(ð .?ó .ˆLÔ*ð —	‘	˜,×8Ñ8Ó9ˆàÐ$¨¸Ñ)<Ø×'Ò'Ø×+Ñ+Ð/BÒBÄuÇyÁyÐQ^ÐabÑQbÔGcÜ#Ð$jÓkÐkØ#×7Ñ7×DÑDÀUÓKäÐRWÖXÈ$˜tŸy™y¨¨T¯[©[×-DÑ-DÕEÒXÓYÙ ¨mÓ<ä+¨HÓ5‘á  }Ó5‘àˆHáÜñ ð ØØ ×2Ñ2Ø ×.Ñ.ð	ôó ð ô /ØØ$Ø*×<Ñ<Ø&×4Ñ4ô	
ð 	
ùò) Ys   Å
-G.c                 ó`  — | j                   } | |dd¬«      }| j                  j                  |j                  «      }t	        |«      D cg c]  }|j                  «       ‘Œ }}t        j                  |d¬«      j                  d|| j                  j                  «      }t        |¬«      S c c}w )a
  
        Generate sequences of sample predictions from a model with a probability distribution head.

        Args:
            past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                Past values of the time series that serves as context in order to predict the target values.

        Return:
            [`SamplePatchTSMixerRegressionOutput`] where the outputs `sequences` tensor will have shape `(batch_size,
            number of samples, num_targets)`.
        NF)r#  r  rï   r   r   r   r  )r  r  r  r%  rí   r  rC   r  rŸ   r5   r  rí  )r#   r#  r  r  r  rÕ   r  s          r%   r  z"PatchTSMixerForRegression.generate  sª   € ð"  $×8Ñ8Ðñ Ø#ØØ!&ô
ˆð ×/Ñ/×<Ñ<¸W×=WÑ=WÓXˆô ,1Ð1EÓ+Fö
Ø&'ˆL×ÑÕ!ð
ˆð 
ô
 —+‘+˜g¨1Ô-×2Ñ2°2Ð7KÈTÏ[É[×MdÑMdÓeˆÜ1¸GÔDÐDùò
s   Á
B+rã  )r,   r-   r.   r   r   r   rC   rD   r   rÇ   r$  r+   r  rí  r  r1   r2   s   @r%   r2  r2    sÁ   ø„ ð%Ð1õ %ðN ð 15Ø/4Ø Ø&*ñZ
à—\‘\ðZ
ð   §¡Ñ-ðZ
ð ' t™nð	Z
ð
 ðZ
ð ˜d‘^ðZ
ð 
)òZ
ó ðZ
ðx €U‡]]ƒ_ð#Eà—\‘\ð#Eð 
,ò#Eó ô#Er&   r2  )r!  r»  rÒ  rú  r  r2  )Nr¹   N)NFr   )Nr   )NN)Rr/   rZ   Údataclassesr   Útypingr   r   r   rC   Útorch.nnr   Útransformers.modeling_utilsr   Útransformers.utilsr   Úmodeling_flash_attention_utilsr
   Úmodeling_utilsr   Úprocessing_utilsr   Útime_series_utilsr   r   r   Úutilsr   r   Úconfiguration_patchtsmixerr   Ú
get_loggerr,   ÚloggerÚModuler   r4   rF   rh   rt   r„   rD   rÆ   r¤   r¦   rÊ   rØ   rÞ   rè   r÷   r  r!  r4  ÚlistrÇ   r0   rQ  rf  rh  rt  r|  rŽ  r¡  r¦  rª  r¹  r»  rÌ  rÒ  rå  rç  rí  ÚdistributionsÚDistributionrò  rø  rú  r  r  r$  r  r2  Ú__all__r2  r&   r%   ú<module>rL     sM  ðñ "ã Ý !ß ,Ñ ,ã Ý å 7Ý *å BÝ 5Ý &ß UÑ Uß ,Ý :ð 
ˆ×	Ñ	˜HÓ	%€ô §¡ô ô*&˜BŸI™Iô &ô,$ R§Y¡Yô $ôN.˜BŸI™Iô .ôbb—i‘iô ô.-¨2¯9©9ô -ðn  $ØØ(,ñ%ØI‰Ið%à<‰<ð%ð 
‰ð%ð <‰<ð	%ð
 ˜UŸ\™\Ñ*ð%ð e‰_ð%ð ð%ð ˜Ÿ™Ñ%ó%ô>U/˜BŸI™Iô U/ôpCb—i‘iô CôL*˜Ÿ	™	ô *ôZ#˜Ÿ	™	ô #ôL&#˜Ÿ	™	ô &#ôR5 B§I¡Iô 5ôpD˜RŸY™Yô DðN ô) /ó )ó ð)ô2˜rŸy™yô ðD 04Ø',Øñ7%ØL‰Lð7%àð7%ð ' t™nð7%ð !%ð	7%ð
 ó7%ð| 04Øñ	A%ØL‰LðA%à$ T¨3 YÑ/ðA%ð ' t™nðA%ð ó	A%ôJ-˜2Ÿ9™9ô -ôb9"˜"Ÿ)™)ô 9"ôz 0˜BŸI™Iô  0ôH3;˜RŸY™Yô 3;ôn ˜BŸI™Iô  ð6 Ùðôô
	= ó 	=óó ð	=ôBkÐ5ô BkðJ Ùðôô
.˜kó .óó ð.ñ4 ðôô
^
Ð3ó ^
óð
^
ðB Ùðôô
= {ó =óó ð=ñ$ ðôô
Q
Ð!<ó Q
óð
Q
ðh Ùðôô
. kó .óó ð.ð0 Ùðôô2¨ó 2óó ð2ð Ùðôô2¨ó 2óó ð2ð#ˆu×"Ñ"×/Ñ/ð #¸¿¹ð #È%Ï,É,ó #ñ* 5§<¡<ð *¸(À5Ç<Á<Ñ:Pð *Ðfk×frÑfró *ô0TEÐ ;ô TEðn Ùðôô
=°Kó =óó ð=ô$k
Ð.Iô k
ð\ Ùðôô
= kó =óó ð=ô$%˜rŸy™yô %ñP ðôô
iEÐ ;ó iEóð
iEòXr&   