
    rhy                   \   d Z ddlmZ ddlZddlZddlZddlm	Z	m
Z
mZ ddlmZmZmZmZmZ ddlmZmZmZmZmZmZmZmZmZ ddlmZmZ dd	lm Z  d
dl!m"Z"  e jF                  e$      Z%dZ&dZ'd Z(d Z)d Z*d Z+ G d dejX                  jZ                        Z. G d dejX                  jZ                        Z/ G d dejX                  jZ                        Z0 G d dejX                  jZ                        Z1 G d dejX                  jZ                        Z2 G d dejX                  jZ                        Z3 G d dejX                  jZ                        Z4 G d  d!ejX                  jZ                        Z5 G d" d#ejX                  jZ                        Z6 G d$ d%ejX                  jZ                        Z7 G d& d'ejX                  jZ                        Z8 G d( d)e      Z9d*Z:d+Z; e
d,e:       G d- d.ejX                  jZ                               Z< e
d,e:       G d/ d0e9             Z= e
d1e:       G d2 d3e9e             Z> G d4 d5ejX                  jZ                        Z? e
d6e:       G d7 d8e9e             Z@ e
d9e:       G d: d;e9e             ZA G d< d=ejX                  jZ                        ZBd@d>ZCg d?ZDy)AzPyTorch ESM model.    )annotationsN   )add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forward)+TFBaseModelOutputWithPastAndCrossAttentions.TFBaseModelOutputWithPoolingAndCrossAttentionsTFMaskedLMOutputTFSequenceClassifierOutputTFTokenClassifierOutput)	TFMaskedLanguageModelingLossTFModelInputTypeTFPreTrainedModelTFSequenceClassificationLossTFTokenClassificationLossget_initializerkeras
shape_listunpack_inputs)check_embeddings_within_boundsstable_softmax)logging   )	EsmConfigzfacebook/esm2_t6_8M_UR50Dr   c                l    t        j                  | dd      \  }}t        j                  | |fd      S )N   axis)tfsplitconcat)xx1x2s      z/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/esm/modeling_tf_esm.pyrotate_halfr'   6   s/    XXa$FB99rc2YR((    c                    |d d d d d t        j                  |       d   d d f   }|d d d d d t        j                  |       d   d d f   }| |z  t        |       |z  z   S )N)r    shaper'   )r#   cossins      r&   apply_rotary_pos_embr.   ;   sd    
a%bhhqk"o%q(
)C
a%bhhqk"o%q(
)CGA,--r(   c                F    | t         j                  j                  |       z   S )zJMake layer symmetric in final two dimensions, used for contact prediction.)r    linalgmatrix_transpose)r#   s    r&   
symmetrizer2   B   s    ryy))!,,,r(   c                    t        j                  | dd      }t        j                  | dd      }t        j                  | dd      }||z  }||z  }| |z
  }|S )z=Perform average product correct, used for contact prediction.r   T)keepdimsr*   )r   r*   )r    
reduce_sum)r#   a1a2a12avg
normalizeds         r&   average_product_correctr;   G   sY    	q"t	,B	q"t	,B
--8d
3C
r'C
)CSJr(   c                  @     e Zd ZdZdd fdZ fdZddZd	dZ xZS )
TFRotaryEmbeddingz
    Rotary position embeddings based on those in
    [RoFormer](https://huggingface.co/docs/transformers/model_doc/roformer). Query and keys are transformed by rotation
    matrices which depend on their relative positions.
    c                4    t         |   |       || _        y )Nname)super__init__dim)selfrC   r@   	__class__s      r&   rB   zTFRotaryEmbedding.__init__Z   s    d# r(   c           
     ^   t         |   |       | j                  d| j                  dz  ft        j
                  t        d      d      | _        | j                  j                  ddt	        j                  d| j                  dt        j
                        | j                  z  z  z         y )	Ninv_freqr         ?F)r+   dtypeinitializer	trainablei'  r   )startlimitdeltarI   )
rA   build
add_weightrC   r    float32r   rG   assignrange)rD   input_shaperE   s     r&   rO   zTFRotaryEmbedding.buildd   s    k"txx1}.bjjo^aNbns ( 
 	5RXXATXXQbjjY\`\d\ddef	
r(   c                f   t        j                  |      |   }t        j                  || j                  j                        }t        j
                  d|| j                        }t        j                  ||fd      d d d d d d f   }t        j                  |      t        j                  |      fS )NrI   z
i, j -> ijr   r   )	r    r+   rS   rG   rI   einsumr"   r,   r-   )rD   r#   seq_dimensionseq_lentfreqsembs          r&   _compute_cos_sinz"TFRotaryEmbedding._compute_cos_sinm   s    ((1+m,HHWDMM$7$78		,4==9iiR0tQ1ABvvc{BFF3K''r(   c                b    | j                  |d      \  }}t        |||      t        |||      fS )Nr*   )rX   )r]   r.   )rD   qkcos_embsin_embs        r&   callzTFRotaryEmbedding.callv   s@    00"0E !GW5 GW5
 	
r(   N)rC   int)r   )r_   	tf.Tensorr`   rf   returnztuple[tf.Tensor, tf.Tensor])	__name__
__module____qualname____doc__rB   rO   r]   rc   __classcell__rE   s   @r&   r=   r=   S   s    
(
r(   r=   c                  @     e Zd ZdZ	 	 	 d	 	 	 d fdZddZd Z xZS )TFEsmContactPredictionHeadzWPerforms symmetrization, apc, and computes a logistic regression on the output featuresc                    t         |   |       || _        || _        t        j
                  j                  d|dd      | _        y )Nr?   r   sigmoid
regression)use_bias
activationr@   )rA   rB   eos_idxin_featuresr   layersDenserr   )rD   rv   biasru   r@   rE   s        r&   rB   z#TFEsmContactPredictionHead.__init__   sD     	d#&,,,,Q)Zf,gr(   c                   | j                   ry d| _         t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d | j                  f       d d d        y y # 1 sw Y   y xY w)NTrr   )builtgetattrr    
name_scoperr   r@   rO   rv   rD   rT   s     r&   rO   z TFEsmContactPredictionHead.build   sy    ::
4t,8t334 @%%tT-=-=&>?@ @ 9@ @s   (A==Bc                   t        j                  || j                  k7  |j                        }t        j                  |d      t        j                  |d      z  }||d d d d d d d d f   z  }|dd dd df   }|ddd dd f   }t        |      \  }}}}}t        j                  ||||z  ||f      }t        t        |            }t        j                  |d      }t        j                  | j                  |      d      S )Nr   r   .r   )r   r   r   r   permr   )r    castru   rI   expand_dimsr   reshaper;   r2   	transposesqueezerr   )	rD   tokens
attentionseos_mask
batch_sizerw   headsseqlen_s	            r&   rc   zTFEsmContactPredictionHead.call   s    776T\\1:3C3CD>>(A.!1LL(1dD!Q+>"??
SbS#2#.
QR,
/9*/E,
FE61ZZ
Z%QW,XY
 -Z
-CD
\\*<@
zz$//*5q99r(   )Tr   N)rv   re   ru   re   rd   )rh   ri   rj   rk   rB   rO   rc   rl   rm   s   @r&   ro   ro      s6    a
 
h
h 	
h@:r(   ro   c                  <     e Zd ZdZd fd	Z	 ddZd ZddZ xZS )TFEsmEmbeddingszV
    Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
    c                   t         |   |       t        j                  j	                  |j
                  |j                  t        |j                        d      | _	        t        j                  j	                  |j                  |j                  t        |j                        d      | _        |j                  r1t        j                  j                  |j                  d      | _        nd | _        t!        |dd      | _        t%        j&                  |j                        d d d f   | _        |j*                  | _        |j.                  | _        |j0                  | _        || _        y )	Nr?   word_embeddings)embeddings_initializerr@   position_embeddings
layer_normepsilonr@   position_embedding_typeabsolute)rA   rB   r   rw   	Embedding
vocab_sizehidden_sizer   initializer_ranger   max_position_embeddingsr   emb_layer_norm_beforeLayerNormalizationlayer_norm_epsr   r|   r   r    rS   position_idspad_token_idpadding_idxtoken_dropoutmask_token_idconfigrD   r   r@   rE   s      r&   rB   zTFEsmEmbeddings.__init__   s-   d#$||55#263K3K#L"	  6  
 $)<<#9#9**#263K3K#L&	 $: $
  ''#ll==fF[F[bn=oDO"DO (/v7PR\']$HHV%C%CDT1WM!..#11#11r(   c                Z   |+|t        || j                  |      }n| j                  |      }|1t        || j                  j
                         | j                  |      }|}| j                  rt        j                  || j                  k(  d d d d d f   d|      }d}t        j                  t        j                  |d      t        j                        }|| j                  k(  }	t        j                  j                  |	t        j                  d      |z  }
|d|z
  z  d|
z
  d d d d f   z  }| j                   dk(  r| j#                  |      }||z  }| j$                  | j%                  |      }|7|t        j                  t        j&                  |d      |j(                        z  }|S )Ng        gQ?r   r   )rI   r   r   r   )"create_position_ids_from_input_idsr   &create_position_ids_from_inputs_embedsr   r   r   r   r   r    wherer   r   r5   rQ   mathcount_nonzeror   r   r   r   rI   )rD   	input_idsattention_maskr   inputs_embedspast_key_values_length
embeddingsmask_ratio_trainsrc_lengthsmasked_tokensmask_ratio_observedr   s               r&   rc   zTFEsmEmbeddings.call   s    $A)TM]M]_uv#JJ=Y *9dkk6L6LM 00;M #
 90B0B#BAq$J"OQTV`aJ)''"--R"H"**UK%););;M"$''"7"7RZZ^`"7"ado"o#q+;';<DW@WYZ\`bfYf?ggJ'':5"&":":<"H--J??&4J%#bggbnn^R.PR\RbRb&ccJ r(   c                   t        |      dd }|d   }t        j                  | j                  dz   || j                  z   dz   t        j                        }t        j
                  t        j                  |d      |      S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: tf.Tensor

        Returns: tf.Tensor
        Nr   r   )rL   rM   rI   r   )r   r    rS   r   int64broadcast_tor   )rD   r   rT   sequence_lengthr   s        r&   r   z6TFEsmEmbeddings.create_position_ids_from_inputs_embeds   st     !/4%a.xx""Q&o@P@P.PST.T\^\d\d
 r~~lA>LLr(   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTr   r   r   )r{   r|   r    r}   r   r@   rO   r   r   r   r   r~   s     r&   rO   zTFEsmEmbeddings.build  s!   ::
4*D1=t33889 1$$**4014.5At77<<= 5((..t454t,8t334 M%%tT4;;3J3J&KLM M 91 15 5M M$   D<%E?3E<EEErd   )NNNNr   )	rh   ri   rj   rk   rB   rc   r   rO   rl   rm   s   @r&   r   r      s&    > rs+ZM"Mr(   r   c                  j     e Zd Zd fd	ZddZ	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZd	dZ xZS )
TFEsmSelfAttentionc                *   t         |   |       |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _        t        j                  j                  | j                  t        |j                        d      | _        t        j                  j                  | j                  t        |j                        d	      | _        t        j                  j                  | j                  t        |j                        d
      | _        t        j                  j#                  |j$                        | _        |xs t)        |dd      | _        d | _        | j*                  dk(  s| j*                  dk(  rf|j.                  | _        t        j                  j1                  d|j.                  z  dz
  | j                  t        |j                              | _        n+| j*                  dk(  rt5        | j                  d      | _        |j6                  | _        || _        y )Nr?   r   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()querykernel_initializerr@   keyvaluer   r   relative_keyrelative_key_queryr   r   )r   rotaryrotary_embeddings)rC   r@   )rA   rB   r   num_attention_headshasattr
ValueErrorre   attention_head_sizeall_head_sizer   rw   rx   r   r   r   r   r   Dropoutattention_probs_dropout_probdropoutr|   r   r   r   r   distance_embeddingr=   
is_decoderr   )rD   r   r   r@   rE   s       r&   rB   zTFEsmSelfAttention.__init__  s@   d# : ::a?PVXhHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PP\\''?6C[C[3\cj ( 

 <<%%?6C[C[3\ch & 
 \\''?6C[C[3\cj ( 

 ||++F,O,OP'> (
'-zC
$ "&''>9T=Y=Y]q=q+1+I+ID(&+ll&<&<F222Q6(('6v7O7O'P '= 'D#
 ))X5%64;S;SZm%nD" ++r(   c                    t        |      d d | j                  | j                  gz   }t        j                  ||      }t        j
                  |d      S )Nr   r   r   r   r   r   )r   r   r   r    r   r   )rD   r#   new_x_shapes      r&   transpose_for_scoresz'TFEsmSelfAttention.transpose_for_scores@  sI     mCR(D,D,DdF^F^+__JJq+&||AL11r(   c	                   | j                  |      }	|d u}
|
r||d   }|d   }|}n |
rC| j                  | j                  |            }| j                  | j                  |            }|}n|y| j                  | j                  |            }| j                  | j                  |            }t	        j
                  |d   |gd      }t	        j
                  |d   |gd      }n@| j                  | j                  |            }| j                  | j                  |            }| j                  |	      }|| j                  dz  z  }| j                  r||f}| j                  dk(  r| j                  ||      \  }}t	        j                  ||d      }| j                  d	k(  s| j                  d
k(  r7t        |      d   }t	        j                  t	        j                  |t        j                        d      }t	        j                  t	        j                  |t        j                        d      }||z
  }| j                  || j                   z   dz
        }t	        j"                  ||j$                        }| j                  d	k(  rt	        j&                  d||      }||z   }nE| j                  d
k(  r6t	        j&                  d||      }t	        j&                  d||      }||z   |z   }|||z   }t)        |d      }| j+                  ||      }|||z  }||z  }t	        j,                  |d      }t        |      d d | j.                  gz   }t	        j0                  ||      }|r||fn|f}| j                  r||fz   }|S )Nr   r   r   r   g      r   Ttranspose_br   r   rV   r   zbhld,lrd->bhlrzbhrd,lrd->bhlrtrainingr   r   r*   )r   r   r   r   r    r"   r   r   r   r   matmulr   r   rS   r   r   r   r   rI   rW   r   r   r   r   r   )rD   hidden_statesr   	head_maskencoder_hidden_statesencoder_attention_maskpast_key_valueoutput_attentionsr   mixed_query_layeris_cross_attention	key_layervalue_layerquery_layerattention_scores
seq_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keyattention_probscontext_layernew_context_layer_shapeoutputss                              r&   rc   zTFEsmSelfAttention.callE  st    !JJ}5
 3$>."<&q)I(+K3N11$((;P2QRI33DJJ?T4UVK3N'11$((=2IJI33DJJ}4MNK		>!#4i"@qII))^A%6$D1MK11$((=2IJI33DJJ}4MNK//0AB "D$<$<d$BB?? (5N''83%)%;%;K%S"K 99[)N''>9T=Y=Y]q=q#M215J^^BHHZrxx,PRTUN^^BHHZrxx,PRSTN%6H#'#:#:8dFbFb;bef;f#g #%77+?ARAR#S ++~=+-995E{Th+i(#36N#N --1EE13;K[Zn1o./1yy9I9Vj/k,#36T#TWs#s %/.@ ))9C ,,,J  -	9O'+5]F",]";CR"@DDVDVCW"W

=2IJ6G=/2mM]?? 11Gr(   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   HxY w# 1 sw Y   xY w# 1 sw Y   ~xY w# 1 sw Y   y xY w)NTr   r   r   r   )r{   r|   r    r}   r   r@   rO   r   r   r   r   r   r~   s     r&   rO   zTFEsmSelfAttention.build  s   ::
4$'3tzz/ H

  $dkk.E.E!FGH4%1txx}}- FdDKK,C,CDEF4$'3tzz/ H

  $dkk.E.E!FGH4,d3?t55::; 3&&,,T23 3 @H HF FH H3 3s0   3G<3G-3GG)GGG&)G2NN)r#   rf   rg   rf   NNNNNFF)r   rf   r   tf.Tensor | Noner   r   r   r   r   r   r   ztuple[tuple[tf.Tensor]] | Noner   bool | Noner   boolrg   ztuple[tf.Tensor]rd   )rh   ri   rj   rB   r   rc   rO   rl   rm   s   @r&   r   r     s    &P2 ,0&*26379=).e e )e $	e
  0e !1e 7e 'e e 
eN3r(   r   c                  0     e Zd Zd fd	ZddZddZ xZS )TFEsmSelfOutputc                   t         |   |       t        j                  j	                  |j
                  t        |j                        d      | _        t        j                  j                  |j                        | _        || _        y Nr?   denser   rA   rB   r   rw   rx   r   r   r   r  r   hidden_dropout_probr   r   r   s      r&   rB   zTFEsmSelfOutput.__init__  l    d#\\''?6C[C[3\cj ( 

 ||++F,F,FGr(   c                X    | j                  |      }| j                  ||      }||z  }|S Nr   r  r   rD   r   input_tensorr   s       r&   rc   zTFEsmSelfOutput.call  2    

=1]XF%r(   c                (   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   y xY wNTr  	r{   r|   r    r}   r  r@   rO   r   r   r~   s     r&   rO   zTFEsmSelfOutput.build  }    ::
4$'3tzz/ H

  $dkk.E.E!FGH H 4H H   3BBrd   Frh   ri   rj   rB   rc   rO   rl   rm   s   @r&   r   r     s    Hr(   r   c                  D     e Zd Zd fd	Zd Z	 	 	 	 	 	 	 ddZddZ xZS )TFEsmAttentionc                    t         |   |       t        |d      | _        t	        |d      | _        t               | _        t        j                  j                  |j                  d      | _        || _        y )Nr?   rD   output	LayerNormr   )rA   rB   r   rD   r   output_layersetpruned_headsr   rw   r   r   r  r   r   s      r&   rB   zTFEsmAttention.__init__  sc    d#&vF;	+FBE88AVAV]h8ir(   c                    t         rd   NotImplementedError)rD   r   s     r&   prune_headszTFEsmAttention.prune_heads      !!r(   c	           
         | j                  |      }	| j                  |	|||||||      }
| j                  |
d   |      }|f|
dd  z   }|S )Nr   r   )r  rD   r  )rD   r   r   r   r   r   r   r   r   hidden_states_lnself_outputsattention_outputr   s                r&   rc   zTFEsmAttention.call  sk      >>-8yy!"	
  ,,\!_mL#%QR(88r(   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTrD   r  r  )r{   r|   r    r}   rD   r@   rO   r  r  r   r   r~   s     r&   rO   zTFEsmAttention.build  s   ::
4&2tyy~~. &		%&4.:t00556 .!!''-.4d+7t~~223 L$$dD$++2I2I%JKL L 8& &. .L Lr   rd   r   )rh   ri   rj   rB   r  rc   rO   rl   rm   s   @r&   r  r    s/    " "#4Lr(   r  c                  0     e Zd Zd fdZddZddZ xZS )TFEsmIntermediatec                    t        |   di | t        j                  j	                  |j
                  t        |j                        d      | _        || _	        y )Nr  )unitsr   r@    )
rA   rB   r   rw   rx   intermediate_sizer   r   r  r   rD   r   kwargsrE   s      r&   rB   zTFEsmIntermediate.__init__  sQ    "6"\\''**.v/G/GH ( 


 r(   c                h    | j                  |      }t        j                  j                  |      }|S )Ninputs)r  r    nngelu)rD   r   s     r&   rc   zTFEsmIntermediate.call  s*    

-
8

=1r(   c                (   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   y xY wr  r  r~   s     r&   rO   zTFEsmIntermediate.build  r  r  r   r   r   rf   rg   rf   rd   r  rm   s   @r&   r%  r%    s    
Hr(   r%  c                  0     e Zd Zd fd	ZddZddZ xZS )TFEsmOutputc                   t         |   |       t        j                  j	                  |j
                  t        |j                        d      | _        t        j                  j                  |j                        | _        || _        y r   r  r   s      r&   rB   zTFEsmOutput.__init__%  r  r(   c                X    | j                  |      }| j                  ||      }||z  }|S r  r  r  s       r&   rc   zTFEsmOutput.call-  r
  r(   c                (   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   y xY wr  )	r{   r|   r    r}   r  r@   rO   r   r)  r~   s     r&   rO   zTFEsmOutput.build3  s}    ::
4$'3tzz/ N

  $dkk.K.K!LMN N 4N Nr  rd   r  r  rm   s   @r&   r5  r5  $  s    Nr(   r5  c                  >     e Zd Zd fd	Z	 	 	 	 	 	 	 ddZddZ xZS )
TFEsmLayerc                   t         |   |       |j                  | _        d| _        t	        |d      | _        |j                  | _        |j                  | _        | j                  r*| j                  st        |  d      t	        |      | _	        t        |d      | _        t        |d      | _        t        j                  j!                  |j"                  d      | _        || _        y )	Nr?   r   	attentionz> should be used as a decoder model if cross attention is addedintermediater  r  r   )rA   rB   chunk_size_feed_forwardseq_len_dimr  r<  r   add_cross_attentionRuntimeErrorcrossattentionr%  r=  r5  r  r   rw   r   r   r  r   r   s      r&   rB   zTFEsmLayer.__init__=  s    d#'-'E'E$'[A ++#)#=#= ##??"dV+i#jkk"0"8D-f>J'X>88AVAV]h8ir(   c	           
        ||d d nd }	| j                  |||||	|      }
|
d   }| j                  r|
dd }|
d   }n|
dd  }d }| j                  rV|Tt        | d      st        d|  d      ||d	d  nd }| j	                  ||||||||
      }|d   }||dd z   }|d   }|z   }| j                  |      }| j                  |      }| j                  |||      }|f|z   }| j                  r|fz   }|S )Nr   )r   r   r   r   r   r   rB  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r*   r   r   )r   r	  r   )r<  r   r   AttributeErrorrB  r  r=  r  )rD   r   r   r   r   r   r   r   r   self_attn_past_key_valueself_attention_outputsr"  r   present_key_valuecross_attn_present_key_valuecross_attn_past_key_valuecross_attention_outputslayernorm_outputintermediate_outputlayer_outputs                       r&   rc   zTFEsmLayer.callM  s    :H9S>"1#5Y] !%/3 "0 "
 2!4 ??,Qr2G 6r :,QR0G'+$??4@4!12$=dV D` `  @N?Yrs(;_c%&*&9&9 %&)!! ': 	'#  7q9 7" ==G ,C2+F( 14P P>>*:;"//>N/O((-<LW_ ) 
  /G+ ??!2 44Gr(   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   1xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTr<  r=  r  r  )r{   r|   r    r}   r<  r@   rO   r=  r  r  r   r   r~   s     r&   rO   zTFEsmLayer.build  sk   ::
4d+7t~~223 +$$T*+4.:t00556 .!!''-.4.:t00556 .!!''-.4d+7t~~223 L$$dD$++2I2I%JKL L 8+ +. .. .L Ls0   F%F#?F/3F;F #F,/F8;Grd   r   r  rm   s   @r&   r:  r:  <  s,    & "#DLLr(   r:  c                  D     e Zd Zd fd	Z	 	 	 	 	 	 	 	 	 	 ddZddZ xZS )TFEsmEncoderc                
   t         |   |       || _        t        |j                        D cg c]  }t        |d|        c}| _        t        j                  j                  |j                  d      | _        y c c}w )Nr?   zlayer_._emb_layer_norm_afterr   )rA   rB   r   rS   num_hidden_layersr:  layerr   rw   r   r   rS  )rD   r   r@   irE   s       r&   rB   zTFEsmEncoder.__init__  ss    d#GLVMeMeGfg!jn=g
$)LL$C$C))0F %D %
! hs   B c                   |	rdnd }|rdnd }|r| j                   j                  rdnd }|rdnd }t        | j                        D ]j  \  }}|	r||fz   }|||   nd }|||   nd } |||||||||      }|d   }|r	||d   fz  }|sB||d   fz   }| j                   j                  sb||d   fz   }l | j                  r| j	                  |      }|	r||fz   }|
st        d |||||fD              S t        |||||      S )Nr(  r   r   r   r   c              3  $   K   | ]  }|| 
 y wrd   r(  ).0vs     r&   	<genexpr>z$TFEsmEncoder.call.<locals>.<genexpr>  s      
 = 
s   )last_hidden_statepast_key_valuesr   r   cross_attentions)r   r@  	enumeraterU  rS  tupler   )rD   r   r   r   r   r   r]  	use_cacher   output_hidden_statesreturn_dictr   all_hidden_statesall_self_attentionsall_cross_attentionsnext_decoder_cacherV  layer_modulelayer_head_maskr   layer_outputss                        r&   rc   zTFEsmEncoder.call  s    #7BD$5b4%64;;;Z;Zr`d#,R$(4 	VOA|#$58H$H!.7.CilO3B3N_Q/TXN(%&!	M *!,M"}R'8&::" &9]1=M<O&O#;;22+?=QRCSBU+U(1	V4 $$ 55mDM 1]4D D 
 "&%'(
 
 
 ;+.+*1
 	
r(   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   bxY w# 1 sw Y   UxY w)NTrS  rU  )
r{   r|   r    r}   rS  r@   rO   r   r   rU  )rD   rT   rU  s      r&   rO   zTFEsmEncoder.build  s    ::
4/6Bt88==> W))//tT[[=T=T0UVW4$'3 &]]5::. &KK%& && 4W W& &s   3CC+C(+C4	rd   )
NNNNNNFFTFr  rm   s   @r&   rQ  rQ    s4    
 "#"E
N
&r(   rQ  c                  0     e Zd Zd fdZddZddZ xZS )TFEsmPoolerc                    t        |   di | t        j                  j	                  |j
                  t        |j                        dd      | _        || _	        y )Ntanhr  )r'  r   rt   r@   r(  )
rA   rB   r   rw   rx   r   r   r   r  r   r*  s      r&   rB   zTFEsmPooler.__init__  sT    "6"\\''$$.v/G/GH	 ( 

 r(   c                <    |d d df   }| j                  |      }|S )Nr   r-  )r  )rD   r   first_token_tensorpooled_outputs       r&   rc   zTFEsmPooler.call  s*     +1a40

*<
=r(   c                (   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   y xY wr  r  r~   s     r&   rO   zTFEsmPooler.build  r  r  r2  r3  rd   r  rm   s   @r&   rm  rm    s    	Hr(   rm  c                      e Zd ZdZeZdZy)TFEsmPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    esmN)rh   ri   rj   rk   r   config_classbase_model_prefixr(  r(   r&   ru  ru     s    
 Lr(   ru  a2  

    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a Keras [Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it as a
    regular Keras model and refer to the TF/Keras documentation for all matters related to general usage and behavior.

    Parameters:
        config ([`EsmConfig`]): Model configuration class with all the parameters of the
            model. Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~TFPreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`tf.Tensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`tf.Tensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        position_ids (`tf.Tensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`tf.Tensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
z]The bare ESM Model transformer outputting raw hidden-states without any specific head on top.c                       e Zd ZdZdgZd
 fd	ZddZd ZddZd Z		 	 	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ
d	 Z xZS )TFEsmMainLayera  

    The model can behave as an encoder (with only self-attention) as well as a decoder, in which case a layer of
    cross-attention is added between the self-attention layers, following the architecture described in [Attention is
    all you need](https://huggingface.co/papers/1706.03762) by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit,
    Llion Jones, Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.

    To behave as an decoder the model needs to be initialized with the `is_decoder` argument of the configuration set
    to `True`. To be used in a Seq2Seq model, the model needs to initialized with both `is_decoder` argument and
    `add_cross_attention` set to `True`; an `encoder_hidden_states` is then expected as an input to the forward pass.
    r   c                H   t        |   d	d|i| || _        |j                  | _        t	        |d      | _        t        |d      | _        |rt        |d      nd | _	        t        | j                  j                  | j                  j                  z  dd      | _        y )
Nr@   r   r?   encoderpoolerTcontact_head)rv   ry   r@   r(  )rA   rB   r   r   r   r   rQ  r|  rm  r}  ro   rT  r   r~  )rD   r   add_pooling_layerr@   r+  rE   s        r&   rB   zTFEsmMainLayer.__init__v  s    -d-f- ++)&|D#F;<Mk&x8SW6558W8WW^biw
r(   c                `   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   ~xY w# 1 sw Y   y xY w)NTr   r|  r}  r~  )
r{   r|   r    r}   r   r@   rO   r|  r}  r~  r~   s     r&   rO   zTFEsmMainLayer.build  sQ   ::
4t,8t334 ,%%d+,4D)5t||001 )""4()44(4t{{//0 (!!$'(4.:t00556 .!!''-. . ;, ,) )( (. .s0   E?%F?FF$?F	FF!$F-c                .    | j                   j                  S rd   )r   r   rD   s    r&   get_input_embeddingsz#TFEsmMainLayer.get_input_embeddings  s    ...r(   c                t    || j                   j                  _        t        |      d   | j                   _        y )Nr   )r   r   weightr   r   )rD   r   s     r&   set_input_embeddingsz#TFEsmMainLayer.set_input_embeddings  s*    16''.%/%6q%9"r(   c                    t         rd   r  )rD   heads_to_prunes     r&   _prune_headszTFEsmMainLayer._prune_heads  r  r(   c                   | j                   j                  sd}	||t        d      |t        |      }n|t        |      d d }nt        d      |\  }}|&d}d gt	        | j
                  j                        z  }nt        |d   d         d   }|t        j                  |||z   fd      }| j                  ||||||	      }t        |      }||z   }| j                  rt        j                  |      }t        j                  t        j                  |d d d d f   ||df      |d d d d f         }t        j                  ||j                  
      }||d d d d d f   z  }t        |      }t        j                  ||d   d|d   |d   f      }|d   3|d d d d | d d d f   }n t        j                  ||d   dd|d   f      }t        j                  ||j                  
      }t        j                   d|j                  
      }t        j                   d|j                  
      }t        j"                  t        j$                  ||      |      }| j                  rf|dt        j                  ||j                  
      }t	        t        |            }|dk(  r|d d d d d d d f   }|dk(  r|d d d d d d f   }dz
  dz  }nd }|t&        d g| j                   j(                  z  }| j                  |||||||	|
|||      }|d   }| j*                  | j+                  |      nd }|s
||f|dd  z   S t-        |||j.                  |j0                  |j2                  |j4                        S )NFzDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embedsr   r*   r   )dimsr   )r   r   r   r   r   r   rV   r   rH   g     r   )r   r   r   r   r   r]  ra  r   rb  rc  r   rD  )r\  pooler_outputr]  r   r   r^  )r   r   r   r   lenr|  rU  r    fillr   rS   
less_equaltiler   rI   r   constantmultiplysubtractr  rT  r}  r	   r]  r   r   r^  )rD   r   r   r   r   r   r   r   r]  ra  r   rb  rc  r   rT   r   r   r   embedding_outputattention_mask_shapemask_seq_lengthseq_idscausal_maskextended_attention_maskone_cstten_thousand_cstnum_dims_encoder_attention_maskencoder_extended_attention_maskencoder_outputssequence_outputrr  s                                  r&   rc   zTFEsmMainLayer.call  s     {{%%I ]%>cdd"$Y/K&$]3CR8KTUU!,
J"%&"#fs4<<+=+='>>O%/0B10E%Fr%J"!WW:zDZ7Z*[cdeN??)%'#9 + 
  *.9$'==
 ??hh/G--dA._a0PQa&K ''+^5I5IJK&1N1dA:4N&N##-.E#F &(jj'*>q*A1FZ[\F]_stu_v)w'# q!-*A!QVWBW*X'&(jj!5a!8!Q@TUV@W X'# #%''*AIYI_I_"`++c)9)?)?@;;x7G7M7MN"$++bkk'CZ.[]m"n ??5A &(WW-CKbKhKh%i".1*=S2T.U+.!32HDRSUV2W/.!32HDRVXYIY2Z/ 035T/TX`.`+.2+  %%!>!>>I,,*2"7#B+/!5# ' 
 *!,FJkkF]/Bcg  #$ $
 >-'+;;)77&11,==
 	
r(   c                     | ||dd      j                   }t        j                  |d      }t        j                  ||j                        }||d d d d d f   z  }||d d d d d d d f   z  }| j                  ||      S )NT)r   rc  r   r   r   )r   r    stackr   rI   r~  )rD   r   r   attnss       r&   predict_contactszTFEsmMainLayer.predict_contacts8  s    VN`deppQ'
 =4t 3444q$ 677  //r(   )TNrd   )r   ztf.VariableNNNNNNNNNNNNF)r   TFModelInputType | Noner   np.ndarray | tf.Tensor | Noner   r  r   r  r   r  r   r  r   r  r]  +tuple[tuple[np.ndarray | tf.Tensor]] | Nonera  r   r   r   rb  r   rc  r   r   r   rg   ATFBaseModelOutputWithPoolingAndCrossAttentions | tuple[tf.Tensor])rh   ri   rj   rk   _keys_to_ignore_on_load_missingrB   rO   r  r  r  rc   r  rl   rm   s   @r&   rz  rz  c  s    

 (7&7#
."/:"
 .28<6:377;?C@DGK!%)-,0#'W
*W
 6W
 4	W

 1W
 5W
  =W
 !>W
 EW
 W
 'W
 *W
 !W
 W
 
KW
r
0r(   rz  c                       e Zd Zdd fdZe eej                  d             ee	e
e      	 	 	 	 	 	 	 	 	 	 	 	 	 d		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d
d                     Zd ZddZ xZS )
TFEsmModelc                R    t        |   |g|i | t        ||d      | _        y )Nrv  r  r@   )rA   rB   rz  rv  )rD   r   r  r.  r+  rE   s        r&   rB   zTFEsmModel.__init__J  s,    3&3F3!&<MTYZr(   batch_size, sequence_length
checkpointoutput_typerw  c                B    | j                  |||||||||	|
|||      }|S )a  
        encoder_hidden_states  (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
            Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
            the model is configured as a decoder.
        encoder_attention_mask (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
            the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        past_key_values (`tuple[tuple[tf.Tensor]]` of length `config.n_layers`)
            contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.
            If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
            don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
            `decoder_input_ids` of shape `(batch_size, sequence_length)`.
        use_cache (`bool`, *optional*, defaults to `True`):
            If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
            `past_key_values`). Set to `False` during training, `True` during generation
        )r   r   r   r   r   r   r   r]  ra  r   rb  rc  r   )rv  )rD   r   r   r   r   r   r   r   r]  ra  r   rb  rc  r   r   s                  r&   rc   zTFEsmModel.callO  sF    V (()%'"7#9+/!5#  
 r(   c                :    | j                   j                  ||      S rd   rv  r  rD   r   r   s      r&   r  zTFEsmModel.predict_contacts      xx((@@r(   c                    | j                   ry d| _         t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   y xY w)NTrv  )r{   r|   r    r}   rv  r@   rO   r~   s     r&   rO   zTFEsmModel.build  se    ::
4%1txx}}- %t$% % 2% %s   A11A:)Tr2  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  r]  r  ra  r   r   r   rb  r   rc  r   r   r   rg   r  rd   )rh   ri   rj   rB   r   r   ESM_INPUTS_DOCSTRINGformatr   _CHECKPOINT_FOR_DOCr	   _CONFIG_FOR_DOCrc   r  rO   rl   rm   s   @r&   r  r  E  s   
[
 *+?+F+FGd+ef&B$ .28<6:377;?C@DGK!%)-,0#' %3*3 63 4	3
 13 53  =3 !>3 E3 3 '3 *3 !3 3 
K3 g 3jA%r(   r  z1ESM Model with a `language modeling` head on top.c                       e Zd ZdgZdgZ fdZd Zd Zd Ze	 e
ej                  d             eeeed	      	 	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd
                     Zd ZddZ xZS )TFEsmForMaskedLMr   r}  c                P   t         |   |       |j                  rt        j	                  d       t        |dd      | _        t        |d      | _        |j                  rt        j                  t        j                  j                  | j                         ddd            5  | j                  j                   j"                  j%                  d	       d d d        | j                  j                   j"                  j&                  d
   | j                  _        y y # 1 sw Y   GxY w)NzjIf you want to use `EsmForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention.Frv  r  lm_headr?   r   r   r   r   )rA   rB   r   loggerwarningrz  rv  TFEsmLMHeadr  tie_word_embeddingsr    r}   ospathjoin_name_scoper   r   rO   weightsdecoderrD   r   rE   s     r&   rB   zTFEsmForMaskedLM.__init__  s     NN1
 "&EN"6	:%%rww||D,<,<,>|Ufgh H##3399,GH#'88#6#6#F#F#N#Nq#QDLL 	 &H Hs   &0DD%c                .    | j                   j                  S rd   r  r  r  s    r&   get_output_embeddingsz&TFEsmForMaskedLM.get_output_embeddings  s    ||###r(   c                &    || j                   _        y rd   r  )rD   new_embeddingss     r&   set_output_embeddingsz&TFEsmForMaskedLM.set_output_embeddings  s    -r(   c                    | j                   S rd   )r  r  s    r&   get_lm_headzTFEsmForMaskedLM.get_lm_head  s    ||r(   r  z<mask>)r  r  rw  maskc                8   ||n| j                   j                  }| j                  ||||||||	|
||      }|d   }| j                  |      }d}|| j	                  ||      }|s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a!  
        labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        kwargs (`dict[str, any]`, *optional*, defaults to `{}`):
            Used to hide legacy arguments that have been deprecated.
        N)
r   r   r   r   r   r   r   rb  rc  r   r   )labelslogitsr   lossr  r   r   )r   use_return_dictrv  r  hf_compute_lossr
   r   r   )rD   r   r   r   r   r   r   r   r  r   rb  rc  r   r   r  prediction_scoresmasked_lm_lossr  s                     r&   rc   zTFEsmForMaskedLM.call  s    > &1%<k$++B]B](()%'"7#9/!5#  
 "!* LL9!11HY1ZN')GABK7F3A3M^%.YSYY$!//))	
 	
r(   c                :    | j                   j                  ||      S rd   r  r  s      r&   r  z!TFEsmForMaskedLM.predict_contacts  r  r(   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTrv  r  )r{   r|   r    r}   rv  r@   rO   r  r~   s     r&   rO   zTFEsmForMaskedLM.build  s    ::
4%1txx}}- %t$%4D)5t||001 )""4() ) 6% %) )   C%CCC )NNNNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r   r  r   r  r  r  r   r   rb  r   rc  r   r   r   rg   z#TFMaskedLMOutput | tuple[tf.Tensor]rd   )rh   ri   rj   r  "_keys_to_ignore_on_load_unexpectedrB   r  r  r  r   r   r  r  r   r  r
   r  rc   r  rO   rl   rm   s   @r&   r  r    s#   '6&7#*3&R"$. *+?+F+FGd+ef&$$	 .28<6:377;?C@D04)-,0#'6
*6
 66
 4	6

 16
 56
  =6
 !>6
 .6
 '6
 *6
 !6
 6
 
-6
 g 6
pA	)r(   r  c                  8     e Zd ZdZd fd	ZddZd Zd Z xZS )r  z&ESM Head for masked language modeling.c                   t         |   |       t        j                  j	                  |j
                  t        |j                        d      | _        t        j                  j                  |j                  d      | _        |j                  rd | _        || _        y t        j                  j	                  |j                  t        |j                        dd      | _        || _        y )	Nr?   r  r   r   r   r  F)r   r@   rs   )rA   rB   r   rw   rx   r   r   r   r  r   r   r   r  r  r   r   r   s      r&   rB   zTFEsmLMHead.__init__  s    d#\\''?6C[C[3\cj ( 

  ,,99&BWBW^j9k%%DL  !<<--!!#263K3K#L	 . DL r(   c                   | j                   ry d| _         | j                  d| j                  j                  fdd      | _        t        | dd       dt        j                  | j                  j                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       dt        j                  | j                  j                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       || j                  j                  set        j                  | j                  j                        5  | j                  j                  d d | j                  j                  g       d d d        y y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTry   zeros)r+   rJ   rK   r  r   r  )r{   rP   r   r   ry   r|   r    r}   r  r@   rO   r   r   r  r  r~   s     r&   rO   zTFEsmLMHead.build  sx    ::
OOF4;;3I3I2KY`lpOq	4$'3tzz/ H

  $dkk.E.E!FGH4t,8t334 M%%tT4;;3J3J&KLM4D)5dkk>]>]t||001 J""D$0G0G#HIJ J ?^5H HM MJ Js$   :3F0+3F=23G	0F:=G	Gc                    d| j                   iS )Nry   )ry   r  s    r&   get_biaszTFEsmLMHead.get_bias+  s    		""r(   c                R   | j                  |      }t        j                  j                  |      }| j	                  |      }| j
                  j                  r1t        j                  || j                  d      | j                  z   }|S | j                  |      | j                  z   }|S )NTr   )
r  r    r/  r0  r   r   r  r   r  ry   )rD   featuresr#   s      r&   rc   zTFEsmLMHead.call.  s    JJx EEJJqMOOA ;;**		!T\\t<tyyHA  Q$))+Ar(   rd   )	rh   ri   rj   rk   rB   rO   r  rc   rl   rm   s   @r&   r  r    s    0$J"#
r(   r  z
    ESM Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    c                       e Zd ZdgZ fdZe eej                  d             e	e
ee      	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd                     Zd	dZ xZS )
TFEsmForSequenceClassificationr   c                    t         |   |       |j                  | _        || _        t	        |dd      | _        t        |d      | _        y NFrv  r  
classifierr?   )rA   rB   
num_labelsr   rz  rv  TFEsmClassificationHeadr  r  s     r&   rB   z'TFEsmForSequenceClassification.__init__E  sB      ++!&EN1&|Lr(   r  r  c                2   |	|	n| j                   j                  }	| j                  ||||||||	|
	      }|d   }| j                  |      }|dn| j	                  ||      }|	s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a  
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   r   r   r   rb  rc  r   r   r   r  )r   r  rv  r  r  r   r   r   rD   r   r   r   r   r   r  r   rb  rc  r   r   r  r  r  r  s                   r&   rc   z#TFEsmForSequenceClassification.callM  s    4 &1%<k$++B]B](()%'/!5#  

 "!*1~t4+?+?+OY,F)-)9TGf$EvE)!//))	
 	
r(   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY wNTrv  r  )r{   r|   r    r}   rv  r@   rO   r  r~   s     r&   rO   z$TFEsmForSequenceClassification.build  s    ::
4%1txx}}- %t$%4t,8t334 ,%%d+, , 9% %, ,r  
NNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r  r  r   r   rb  r   rc  r   r   r   rg   z-TFSequenceClassifierOutput | tuple[tf.Tensor]rd   )rh   ri   rj   r  rB   r   r   r  r  r   r  r   r  rc   rO   rl   rm   s   @r&   r  r  ;  s     (7&7#M *+?+F+FGd+ef&.$ .28<6:377;04)-,0#'.
*.
 6.
 4	.

 1.
 5.
 ..
 '.
 *.
 !.
 .
 
7.
 g .
`	,r(   r  z
    ESM Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
    Named-Entity-Recognition (NER) tasks.
    c                       e Zd ZdgZdgZ fdZe eej                  d             e
eee      	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	d                     Zd
dZ xZS )TFEsmForTokenClassificationr}  r   c                6   t         |   |       |j                  | _        t        |dd      | _        t
        j                  j                  |j                        | _	        t
        j                  j                  |j                  d      | _        || _        y r  )rA   rB   r  rz  rv  r   rw   r   r  r   rx   r  r   r  s     r&   rB   z$TFEsmForTokenClassification.__init__  sq      ++!&EN||++F,F,FG,,,,V->->\,Rr(   r  r  c                X   |	|	n| j                   j                  }	| j                  ||||||||	|
	      }|d   }| j                  ||
      }| j	                  |      }|dn| j                  ||      }|	s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )z
        labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr  r   r   r   r  )	r   r  rv  r   r  r  r   r   r   r  s                   r&   rc   z TFEsmForTokenClassification.call  s    0 &1%<k$++B]B](()%'/!5#  

 "!*,,,J1~t4+?+?+OY,F)-)9TGf$EvE&!//))	
 	
r(   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   |xY w# 1 sw Y   y xY wr  )
r{   r|   r    r}   rv  r@   rO   r  r   r   r~   s     r&   rO   z!TFEsmForTokenClassification.build  s    ::
4%1txx}}- %t$%4t,8t334 M%%tT4;;3J3J&KLM M 9% %M Ms   C"%3C."C+.C7r  )r   r  r   r  r   r  r   r  r   r  r  r  r   r   rb  r   rc  r   r   r   rg   z*TFTokenClassifierOutput | tuple[tf.Tensor]rd   )rh   ri   rj   r  r  rB   r   r   r  r  r   r  r   r  rc   rO   rl   rm   s   @r&   r  r    s     +4&'6&7# *+?+F+FGd+ef&+$ .28<6:377;04)-,0#'/
*/
 6/
 4	/

 1/
 5/
 ./
 '/
 */
 !/
 /
 
4/
 g /
b	Mr(   r  c                  4     e Zd ZdZd fd	ZddZddZ xZS )r  z-Head for sentence-level classification tasks.c                   t         |   |       t        j                  j	                  |j
                  t        |j                        dd      | _        t        j                  j                  |j                        | _        t        j                  j	                  |j                  t        |j                        dd      | _        || _        y )Nr?   ro  r  )r   rt   r@   linearout_proj)rA   rB   r   rw   rx   r   r   r   r  r   r  r   r  r  r   r   s      r&   rB   z TFEsmClassificationHead.__init__  s    d#\\''.v/G/GH	 ( 

 ||++F,F,FG**.v/G/GH	 + 
 r(   c                    |d d dd d f   }| j                  ||      }| j                  |      }| j                  ||      }| j                  |      }|S )Nr   r   )r   r  r  )rD   r  r   r#   s       r&   rc   zTFEsmClassificationHead.call  sV    Q1WLLXL.JJqMLLXL.MM!r(   c                "   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   |xY w# 1 sw Y   y xY w)NTr  r  )
r{   r|   r    r}   r  r@   rO   r   r   r  r~   s     r&   rO   zTFEsmClassificationHead.build  s    ::
4$'3tzz/ H

  $dkk.E.E!FGH4T*6t}}112 K##T41H1H$IJK K 7H HK Ks   3C9<3D9DDrd   r  )rh   ri   rj   rk   rB   rc   rO   rl   rm   s   @r&   r  r    s    7"	Kr(   r  c                    t        j                  | |k7  t         j                        }t        j                  |d      |z   |z  }||z   S )z
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`.

    Args:
        x: tf.Tensor x:

    Returns: tf.Tensor
    r   r   )r    r   r   cumsum)r   r   r   r  incremental_indicess        r&   r   r     sD     779+RXX6D99T25KKtS,,r(   )r  r  r  r  ru  )r   )Erk   
__future__r   r  numpynp
tensorflowr    
file_utilsr   r   r   modeling_tf_outputsr   r	   r
   r   r   modeling_tf_utilsr   r   r   r   r   r   r   r   r   tf_utilsr   r   utilsr   configuration_esmr   
get_loggerrh   r  r  r  r'   r.   r2   r;   rw   Layerr=   ro   r   r   r   r  r%  r5  r:  rQ  rm  ru  ESM_START_DOCSTRINGr  rz  r  r  r  r  r  r  r   __all__r(  r(   r&   <module>r     s    " 	   q q 
 
 
 G  ( 
		H	%1 )
.-
	)
** )
X%:!3!3 %:PmMell(( mM`d3++ d3NHell(( H02LU\\'' 2LjH** H2N%,,$$ N0fL## fLRZ&5<<%% Z&|H%,,$$ H:,  ' T c[0U\\'' [0	[0| cK%% K%	K%\ MObcj)+-I j) dj)Z3%,,$$ 3l  K,%9;W K,K,\  NM"68Q NMNMb%Kell00 %KP- r(   