
    rh?                     :   d dl Z d dlmZ d dlmZmZ d dlZd dlZd dlmZm	Z	 ddl
mZmZ ddlmZ ddlmZ dd	lmZmZmZmZ dd
lmZmZmZ ddlmZmZmZ ddlmZ ddl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) ddl*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0 ddl1m2Z2m3Z3  ejh                  e5      Z6 G d de"      Z7d Z8d Z9 G d de	jt                        Z; G d de(e	jt                        Z< G d de)      Z= G d de!      Z> G d de$      Z? G d  d!e&      Z@ G d" d#e%      ZA G d$ d%e#      ZB G d& d'e'      ZCe G d( d)e             ZD G d* d+eD      ZE G d, d-e	jt                        ZF G d. d/e	jt                        ZG G d0 d1e	jt                        ZHee G d2 d3e                    ZI G d4 d5e	jt                        ZJ G d6 d7e	jt                        ZK G d8 d9e/      ZL G d: d;e0      ZM G d< d=e-      ZN G d> d?e+      ZO G d@ dAe,      ZP G dB dCe.      ZQ G dD dEeQ      ZR G dF dGeQe      ZSg dHZTy)I    N)	dataclass)OptionalUnion)Tensornn   )CacheDynamicCache)GenerationMixin)create_causal_mask)BaseModelOutputWithPast,BaseModelOutputWithPoolingAndCrossAttentionsCausalLMOutputWithPastModelOutput)ModuleUtilsMixinPreTrainedModelget_parameter_dtype)auto_docstringcan_return_tuplelogging)check_model_inputs   )	EsmAttentionEsmEmbeddings
EsmEncoderEsmIntermediateEsmLayer	EsmOutput	EsmPoolerEsmSelfAttentionEsmSelfOutput)LlamaAttentionLlamaDecoderLayerLlamaMLPLlamaPreTrainedModelLlamaRMSNormLlamaRotaryEmbedding   )EvollaConfigSaProtConfigc                        e Zd Z fdZ xZS )EvollaSaProtEmbeddingsc                 0    t         |           d | _        y N)super__init__position_idsselfconfig	__class__s     |/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/evolla/modular_evolla.pyr0   zEvollaSaProtEmbeddings.__init__B   s         )__name__
__module____qualname__r0   __classcell__r5   s   @r6   r,   r,   A   s    ! !r7   r,   c                 b    | j                  dd      \  }}t        j                  | |fd      S )Nr   dim)chunktorchcat)xx1x2s      r6   rotate_half_esmrG   H   s/    WWQBWFB99rc2YB''r7   c                     |d d d d d | j                   d   d d f   }|d d d d d | j                   d   d d f   }| |z  t        |       |z  z   S )N)shaperG   )rD   cossins      r6   apply_rotary_pos_emb_esmrM   M   sY    
aMaggbkM1$
%C
aMaggbkM1$
%CG*S011r7   c                        e Zd ZdZdef fdZd	dZdej                  dej                  de	ej                  ej                  f   fdZ
 xZS )
EvollaSaProtRotaryEmbeddingz
    Rotary position embeddings based on those in
    [RoFormer](https://huggingface.co/docs/transformers/model_doc/roformer). Query and keys are transformed by rotation
    matrices which depend on their relative positions.
    r@   c                     t         |           ddt        j                  d|dt        j                        j                         |z  z  z  }|}| j                  d|       d | _        d | _        d | _	        y )N      ?i'  r   r   dtypeinv_freq)
r/   r0   rB   arangeint64floatregister_buffer_seq_len_cached_cos_cached_sin_cached)r3   r@   rT   r5   s      r6   r0   z$EvollaSaProtRotaryEmbedding.__init__[   sl    %ELLC%++$N$T$T$VY\$\]^Z2#r7   c                 t   |j                   |   }|| j                  k7  s#| j                  j                  |j                  k7  r|| _        t	        j
                  |j                   |   |j                        j                  | j                        }t	        j                  || j                        }t	        j                  ||fd      j                  |j                        }|j                         d d d d d d f   | _        |j                         d d d d d d f   | _        | j                  | j                  fS )Ndevicer>   r?   )rJ   rY   rZ   r^   rB   rU   type_asrT   outerrC   torK   rL   r[   )r3   rD   seq_dimensionseq_lentfreqsembs          r6   _update_cos_sin_tablesz2EvollaSaProtRotaryEmbedding._update_cos_sin_tablesf   s    ''-( d***d.>.>.E.E.Q#*D QWW]3AHHEMMdmm\AKK4==1E))UEN366qxx@C"wwytQ)9:D"wwytQ)9:D!1!111r7   qkreturnc                     | j                  |d      \  | _        | _        t        || j                  | j                        t        || j                  | j                        fS )NrI   )rb   )rg   rZ   r[   rM   )r3   rh   ri   s      r6   forwardz#EvollaSaProtRotaryEmbedding.forwardv   s_    -1-H-HZ\-H-]*$* %Q(8(8$:J:JK$Q(8(8$:J:JK
 	
r7   )r   )r8   r9   r:   __doc__intr0   rg   rB   r   tuplerl   r;   r<   s   @r6   rO   rO   T   sM    	 C 	 2 
 
%,, 
5u||A[;\ 
r7   rO   c                       e Zd ZddZy)EvollaSaProtSelfAttentionNc                    t         j                  j                          || _        |j                  |j
                  z  dk7  r2t        |d      s&t        d|j                   d|j
                   d      |j
                  | _        t        |j                  |j
                  z        | _	        | j
                  | j                  z  | _
        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                         | _        |xs t%        |dd      | _        d | _        | j&                  dk(  s| j&                  d	k(  rG|j*                  | _        t        j,                  d
|j*                  z  dz
  | j                        | _        n*| j&                  dk(  rt1        | j                        | _        |j2                  | _        || _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()position_embedding_typeabsoluterelative_keyrelative_key_queryr   r(   rotaryr?   )r   Moduler0   r4   hidden_sizenum_attention_headshasattr
ValueErrorrn   attention_head_sizeall_head_sizeLinearquerykeyvalueDropoutattention_probs_dropout_probdropoutgetattrru   rotary_embeddingsmax_position_embeddings	Embeddingdistance_embeddingrO   
is_decoder	layer_idx)r3   r4   ru   r   s       r6   r0   z"EvollaSaProtSelfAttention.__init__   s   
		 : ::a?PVXhHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF'> (
'-zC
$ "&''>9T=Y=Y]q=q+1+I+ID(&(ll1v7U7U3UXY3Y[_[s[s&tD#))X5%@TE]E]%^D" ++"r7   NN)r8   r9   r:   r0    r7   r6   rq   rq      s    #r7   rq   c                       e Zd Zy)EvollaSaProtSelfOutputNr8   r9   r:   r   r7   r6   r   r          r7   r   c                       e Zd Zy)EvollaSaProtAttentionNr   r   r7   r6   r   r      r   r7   r   c                       e Zd Zy)EvollaSaProtIntermediateNr   r   r7   r6   r   r      r   r7   r   c                       e Zd Zy)EvollaSaProtOutputNr   r   r7   r6   r   r      r   r7   r   c                       e Zd Zy)EvollaSaProtLayerNr   r   r7   r6   r   r      r   r7   r   c                       e Zd Zy)EvollaSaProtEncoderNr   r   r7   r6   r   r      r   r7   r   c                       e Zd Zy)EvollaSaProtPoolerNr   r   r7   r6   r   r      r   r7   r   c                   (    e Zd ZU eed<   dgZdZd Zy)EvollaSaProtPreTrainedModelr4   r   Tc                    | j                   j                  }t        |t        j                        rY|j
                  j                  j                  d|       |j                  %|j                  j                  j                          yyt        |t        j                        rf|j
                  j                  j                  d|       |j                  2|j
                  j                  |j                     j                          yyt        |t        j                        rJ|j                  j                  j                          |j
                  j                  j                  d       yy)zInitialize the weights        meanstdNrQ   )r4   initializer_range
isinstancer   r   weightdatanormal_biaszero_r   padding_idx	LayerNormfill_r3   moduler   s      r6   _init_weightsz)EvollaSaProtPreTrainedModel._init_weights   s    kk++fbii(MM&&CS&9{{&  &&( '-MM&&CS&9!!-""6#5#56<<> .-KK""$MM$$S) .r7   N)r8   r9   r:   r*   __annotations___no_split_modules_supports_flash_attnr   r   r7   r6   r   r      s    ,-*r7   r   c                        e Zd Zdef fdZd Zd Zd Ze	 dde	e
j                     de	e
j                     deee
j                     ef   fd	       Z	 dded
ee   de
j"                  de
j$                  def
dZ xZS )EvollaSaProtProteinEncoderr4   c                 d    t         |   |       t        |      | _        t	        |      | _        y r.   )r/   r0   r,   
embeddingsr   encoderr2   s     r6   r0   z#EvollaSaProtProteinEncoder.__init__   s(     08*62r7   c                 .    | j                   j                  S r.   r   word_embeddingsr3   s    r6   get_input_embeddingsz/EvollaSaProtProteinEncoder.get_input_embeddings   s    ...r7   c                 &    || j                   _        y r.   r   r3   r   s     r6   set_input_embeddingsz/EvollaSaProtProteinEncoder.set_input_embeddings   s    */'r7   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr   layer	attentionprune_heads)r3   heads_to_pruner   headss       r6   _prune_headsz'EvollaSaProtProteinEncoder._prune_heads   sE    
 +002 	CLE5LLu%//;;EB	Cr7   	input_idsattention_maskrj   c                 N   |j                         }|\  }}|j                  }|t        j                  ||f|      }| j	                  ||      }| j                  ||      }| j                  ||      }	|	d   }
t        |
|	j                  |	j                  |	j                        S )Nr]   r   r   )r   r   )last_hidden_statehidden_states
attentionscross_attentions)sizer^   rB   onesr   get_extended_attention_maskr   r   r   r   r   )r3   r   r   input_shape
batch_size
seq_lengthr^   inputs_embedsextended_attention_maskencoder_outputssequence_outputs              r6   rl   z"EvollaSaProtProteinEncoder.forward   s      nn&!,
J!!!"ZZ*j)A6RN)N["&"B"B>S^"_,,}E\,])!,;-)77&11,==	
 	
r7   r   r^   rS   c                 4   |t        |       }|j                         dk(  r| j                  j                  s|t	        j
                  dt               |j                         dk(  r|dddddddf   }nk|j                         dk(  r<| j                  j                  rt        j                  |||      }n*|ddddddf   }nt        d| d|j                   d      |j                  |      }d	|z
  t        j                  |      j                  z  }|S )
a  
        Makes broadcastable attention and causal masks so that future and masked tokens are ignored.

        Arguments:
            attention_mask (`torch.Tensor`):
                Mask with ones indicating tokens to attend to, zeros for tokens to ignore.
            input_shape (`Tuple[int]`):
                The shape of the input to the model.

        Returns:
            `torch.Tensor` The extended attention mask, with a the same dtype as `attention_mask.dtype`.
        Nr   zNThe `device` argument is deprecated and will be removed in v5 of Transformers.r   z!Wrong shape for input_ids (shape z) or attention_mask (shape rt   rR   rQ   )r   r@   r4   r   warningswarnFutureWarningr   *create_extended_attention_mask_for_decoderr~   rJ   ra   rB   finfomin)r3   r   r   r^   rS   r   s         r6   r   z6EvollaSaProtProteinEncoder.get_extended_attention_mask   s"    ='-E""$)dkk.D.D!dfs
 1$&4Qa]&C#!Q& {{%%*:*e*e+' +9D$9I*J'3K=@[\j\p\p[qqrs  #:"<"<5"<"I#&)@#@EKKPUDVDZDZ"Z&&r7   r.   r   )r8   r9   r:   r*   r0   r   r   r   r   r   rB   r   r   ro   r   rl   rn   r^   rW   r   r;   r<   s   @r6   r   r      s    3| 3
/0C  26
ELL)
 !.
 
uU\\"$PP	Q	
 
2 rv2'$2'38:2'GL||2'chcncn2'	2'r7   r   c                   &     e Zd Zd fd	Zd Z xZS )!EvollaSequenceCompressorAttentionc                 j   t         |           |dz  | _        || _        ||z  }t	        j
                  |      | _        t	        j
                  |      | _        t	        j                  ||d      | _	        t	        j                  ||dz  d      | _
        t	        j                  ||d      | _        y )N      Fr   r   )r/   r0   scaler   r   r   
norm_medianorm_latentsr   to_qto_kvto_out)r3   r@   dim_headr   	inner_dimr5   s        r6   r0   z*EvollaSequenceCompressorAttention.__init__6  s    t^

u$	,,s+LL-IIc959	YYsIM>
ii	3U;r7   c                 F   | j                  |      }| j                  |      }| j                  }| j                  |      }t	        j
                  ||fd      }| j                  |      j                  dd      \  }}|j                  |j                  d      |j                  d      |d      j                  dddd      }|j                  |j                  d      |j                  d      |d      j                  dddd      }|j                  |j                  d      |j                  d      |d      j                  dddd      }|| j                  z  }t	        j                  ||j                  dd            }	|	|	j                  dd	      j                         z
  }	|	j                   \  }
}}}t	        j"                  ||      j%                  |j&                        }|d
d
d
d
d
d
f   }|d
d
d
d
d
d
f   }||z  }|	j)                  d|z
  j+                         d      }	|	j-                  d      }t	        j                  ||      }|j                  dddd      }|j/                  |j                  d      |j                  d      d      }| j1                  |      S )z
        Args:
            x (torch.Tensor): image features
                shape (b, n1, D)
            latent (torch.Tensor): latent features
                shape (b, n2, D);  n2: num of latent tokens
        rI   r?   r   r>   r   r(   r   Tr@   keepdimNg     )r   r   r   r   rB   rC   r   rA   viewr   permuter   matmul	transposeamaxdetachrJ   r   ra   r^   masked_fillboolsoftmaxreshaper   )r3   rD   latentsmaskhrh   kv_inputri   vsimbsnhskdokdr   mask_expones_expattnouts                      r6   rl   z)EvollaSequenceCompressorAttention.forwardC  sB    OOA##G,JJIIg99a\r2zz(#))2 * 
1 FF166!9affQiB/771aCFF166!9affQiB/771aCFF166!9affQiB/771aC

N ll1akk"b12CHHTH299;;99BSzz"c"%%dkk24q()aD()("ooq4xoo/6{{r{"ll4#kk!Q1% kk#((1+sxx{B7{{3r7   )@      r8   r9   r:   r0   rl   r;   r<   s   @r6   r   r   5  s    <) r7   r   c                   &     e Zd Zd fd	Zd Z xZS )EvollaFeedForwardc                    t         |           t        ||z        }t        j                  |      | _        t        j                  ||d      | _        t        j                         | _	        t        j                  ||d      | _
        y NFr   )r/   r0   rn   r   r   normr   fc1GELU
activationfc2)r3   r@   multr   r5   s       r6   r0   zEvollaFeedForward.__init__p  s`    d
O	LL%	99S)%8'')99Y%8r7   c           	      ~    | j                  | j                  | j                  | j                  |                        S r.   )r  r  r  r  )r3   rD   s     r6   rl   zEvollaFeedForward.forwardy  s+    xx1(>?@@r7   )   r  r<   s   @r6   r  r  o  s    9Ar7   r  c                   *     e Zd Zdef fdZd Z xZS )!EvollaSequenceCompressorResamplerr4   c           
         t         |           |j                  j                  }|j                  | _        t        j                  t        j                  | j
                  |      d      | _
        t        j                  g       | _        t        |j                        D ]g  }| j                  j                  t        j                  t!        ||j"                  |j$                        t'        ||j(                        g             i t        j*                  |j                        | _        t        j.                  ||j                        | _        y )NT)requires_grad)r@   r   r   )r@   r  )r/   r0   protein_encoder_configr{   resampler_num_latentsnum_latentsr   	ParameterrB   randnr   
ModuleListlayersrangeresampler_depthappendr   resampler_dim_headresampler_headsr  resampler_ff_multr   r  r   protein_projector)r3   r4   protein_repr_dim_r5   s       r6   r0   z*EvollaSequenceCompressorResampler.__init__~  s   !88DD!77||EKK0@0@BR$ScghmmB'v--. 
	AKK9 06;T;T\b\r\r *.>VE]E]^		
	 LL!3!34	!#+;V=O=O!Pr7   c                 j   |j                   d   }|j                   \  }}t        j                  || j                        j	                  |j
                        }t        j                  ||fd      }t        j                  |      j	                  | j                  j
                        }| j                  d    |j                  ddd      z  }|j	                  |j                        }| j                  D ]  \  }	}
 |	|||      |z   } |
|      |z   } | j                  |      }| j                  |      S )Nr   r(   r?   r>   )rJ   rB   r   r#  ra   r^   rC   r   r   rS   r'  r.  r  )r3   embedsr  br  r0  latent_maskr   r   r  fftransformed_features               r6   rl   z)EvollaSequenceCompressorResampler.forward  s   LLO

AjjT%5%5699$++Fyy$,!4 zz!} 3 34,,t$tyyQ'::**V\\* 	,HD"67D1G;GkG+G	, #44W=yy,--r7   )r8   r9   r:   r)   r0   rl   r;   r<   s   @r6   r  r  }  s    Q| Q*.r7   r  c                       e Zd ZU dZej
                  ed<   dZeej
                     ed<   dZ	ee
ej
                  df      ed<   dZee
ej
                  df      ed<   y)EvollaProteinEncoderModelOutputNsequence_compressor_outputr   .r   r   )r8   r9   r:   r9  rB   FloatTensorr   r   r   r   ro   r   r   r7   r6   r8  r8    si     59 1 1859x 1 129=AM8E%"3"3S"89:A:>Ju00#567>r7   r8  c                   f     e Zd Zdef fdZedej                  dej                  fd       Z	 xZ
S )EvollaProteinEncoderr4   c                 z    t         |           t        |j                        | _        t        |      | _        y )Nr4   )r/   r0   r   r!  modelr  sequence_compressor_resamplerr2   s     r6   r0   zEvollaProteinEncoder.__init__  s.    /v7T7TU
-NV\-]*r7   r   r   c                     | j                  ||      }|j                  }| j                  ||      }t        ||j                        S )Nr   )r9  r   )r?  r   r@  r8  )r3   r   r   kwargsprotein_outputprotein_embedssequence_reprs          r6   rl   zEvollaProteinEncoder.forward  sJ    iW'99::>>Z.'4,>>
 	
r7   )r8   r9   r:   r)   r0   r   rB   
LongTensorr:  rl   r;   r<   s   @r6   r<  r<    s?    ^| ^
 
!1!1 
5CTCT 
 
r7   r<  c                   b     e Zd Z	 	 	 ddee   dee   dee   f fdZd Z	 	 	 	 	 	 	 ddZ xZS )	#EvollaSequenceAlignerCrossAttentionprotein_encoder_dimstructure_encoder_dimmsa_encoder_dimc                    t         |           |j                  | _        |j                  | _        | j                  dz  | _        t        | j                  | j                  z        | _        | j                  | j                  z  | _        |j                  }|j                  }|j                  }t        j                  | j                  | j                        | _        |Kt        j                  || j                        | _        t        j                  || j                        | _        nd | _        d | _        |Kt        j                  || j                        | _        t        j                  || j                        | _        nd | _        d | _        |Kt        j                  || j                        | _        t        j                  || j                        | _        nd | _        d | _        t)        | j                        | _        t        j,                  |      | _        t        j                  | j                  | j                  |      | _        t3        | j                  |      | _        t        j6                  t9        j:                  dg            | _        t        j6                  t9        j:                  dg            | _        y )Nr   r   r   ) r/   r0   r{   r|   r   rn   r   r   $aligner_attention_probs_dropout_probaligner_enable_biasaligner_ffn_multr   r   r   key_proteinvalue_proteinkey_structurevalue_structurekey_msa	value_msaEvollaRMSNormattention_normr   r   out_projr  r5  r$  rB   tensorgate_attentiongate_ffw)	r3   r4   rI  rJ  rK  r   enable_biasffn_multr5   s	           r6   r0   z,EvollaSequenceAlignerCrossAttention.__init__  s    	!--#)#=#= --t3
#&t'7'7$:R:R'R#S !558P8PP'-'R'R$00**YYt//1C1CD
*!yy)<d>P>PQD!#+>@R@R!SD#D!%D ,!#+@$BTBT!UD#%99-BDDVDV#WD !%D#'D &99_d6H6HIDLYY8J8JKDNDL!DN+D,<,<=zz">?		$"2"2D4D4D;W#D$4$4h? ll5<<+>?U\\3%%89r7   c	                    |||g}	|	D 
cg c]  }
|
|
	 }	}
|	st        d      t        j                  |	d      }	| j                  |      }| j	                  |      }| j
                  @| j                  4|j                  |      }| j                  |      }| j                  |      }nd}d}| j                  @| j                  4|j                  |      }| j                  |      }| j                  |      }nd}d}| j                  @| j                  4|j                  |      }| j                  |      }| j                  |      }nd}d}|||g}|D 
cg c]  }
|
|
	 }}
t        j                  |d      }|||g}|D 
cg c]  }
|
|
	 }}
t        j                  |d      }|j                         dd | j                  | j                  fz   } |j                  | j!                  dddd      }|j                         dd | j                  | j                  fz   } |j                  | j!                  dddd      }|j                         dd | j                  | j                  fz   } |j                  | j!                  dddd      }|| j"                  z  }|Mt        j$                  |j                  d      |j                  d            j                  |j&                        }|ddddddf   |	ddddddf   z  }t        j(                  ||j+                  dd	            }||j-                  dd
      j/                         z
  }|j1                  d|z
  j3                         t        j4                  |j6                        j8                        } t;        j<                  d      |      }t        j(                  ||      }|j!                  dddd      j?                         }|j                         dd	 | j@                  fz   } |j                  | }| jC                  |      }|S c c}
w c c}
w c c}
w )z
        query_states: text
        key_value_states: protein
        query_states: [bs, query_seq_len, dim]
        key_value_states: [bs, kv_seq_len, dim]
        query_attn_mask: [bs, query_seq_len]
        kv_attn_mask: [bs, kv_seq_len]
        Nz=At least one modality should be provided for cross attention.r(   r?   r>   r   r   r   rI   Tr   )"r~   rB   rC   rW  r   rP  rQ  ra   rR  rS  rT  rU  r   r|   r   r   r   r   r   r^   r   r   r   r   r   r   r   rS   r   r   Softmax
contiguousr   rX  )r3   query_statesprotein_key_value_statesstructure_key_value_statesmsa_key_value_statesquery_attn_maskprotein_kv_attn_maskstructure_kv_attn_maskmsa_kv_attn_maskkv_attn_maskr0  query_layerkey_layer_proteinvalue_layer_proteinkey_layer_structurevalue_layer_structurekey_layer_msavalue_layer_msa	key_layervalue_layernew_query_layer_shapenew_key_layer_shapenew_value_layer_shaper   attn_weightsattention_scoresattention_probscontext_layernew_context_layer_shapes                                r6   cross_attentionz3EvollaSequenceAlignerCrossAttention.cross_attention  si   * -.DFVW#/Aa1=AA\]]yy15)),7 jj-'D,>,>,J'?'B'B<'P$ $ 0 01I J"&"4"45M"N $"&)d.B.B.N)C)F)F|)T&"&"4"45O"P$($8$89S$T!"&$(!<<#(B#7#:#:<#H  LL)=>M"nn-ABO M"O&(;]K	 );1Q]Q;	;IIiQ/	*,A?S"-?Qq??ii3 + 0 0 23B 7$$$$;
 !
 'k&&(=>FFq!QPQR'nn.s3$$$$7
 
 #INN$78@@Aq!L	 + 0 0 23B 7$$$$;
 !
 'k&&(=>FFq!QPQR!DJJ. "#jj):):1)=|?P?PQR?STWWXdXkXklO(D!T)9:\!TSWYZJZ=[[||K1D1DR1LM#l&7&7B&7&M&T&T&VV'33%%'\5G5G)H)L)L
 -"**,-=> _kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S***,CDm4q BL < @s"   P5P5P:P:P?P?c           
      ^   |z|j                   \  }}}|jt        j                  ||      j                  |	j                        |	j                  ||f      j                  z  j                  |j                        }nd }|z|j                   \  }}}|jt        j                  ||      j                  |	j                        |
j                  ||f      j                  z  j                  |j                        }nd }|z|j                   \  }}}|jt        j                  ||      j                  |	j                        |j                  ||f      j                  z  j                  |j                        }nd }|}||j                         s$||j                         s||j                         rz|}| j                  ||||||||      }t        j                  | j                        |z  }||z   }|}| j                  |      t        j                  | j                        z  }||z   }|S )N)r   )ra  rb  rc  rd  re  rf  rg  rh  )rJ   rB   r   ra   r^   expandTanyr{  tanhrZ  r5  r[  )r3   ra  protein_kv_statesstructure_kv_statesmsa_kv_statesre  rf  rg  rh  protein_batch_maskstructure_batch_maskmsa_batch_maskpast_key_valuer  protein_kv_seq_lenr@   structure_kv_seq_lenmsa_kv_seq_lenr   residuals                       r6   rl   z+EvollaSequenceAlignerCrossAttention.forwardf  sL    (*;*A*A'B"C#+JJr#5699:L:S:ST(//6H"5M/NPPQ"&--. %
 $( *,?,E,E)B$c%-JJr#78;;<N<U<UV*118Lb7Q1RTTU"(//0 '
 &*"$&3&9&9#B'JJr>2556H6O6OP$++."1E+FHHI"]))* !
  $$ */C/G/G/I#/4J4N4N4P).>.B.B.D$H 00*):+>%2 /%9'=!1 1 	M "JJt':':;mKM$}4M$H GGM2UZZ5NNM$}4Mr7   )NNNNNNNNNN)	r8   r9   r:   r   rn   r0   r{  rl   r;   r<   s   @r6   rH  rH    sb     .2/3)-1: &c]1:  (}	1:
 "#1:fnn "#!Gr7   rH  c                       e Zd Zy)rV  Nr   r   r7   r6   rV  rV    r   r7   rV  c                       e Zd Zy)EvollaRotaryEmbeddingNr   r   r7   r6   r  r    r   r7   r  c                       e Zd Zy)	EvollaMLPNr   r   r7   r6   r  r    r   r7   r  c                       e Zd Zy)EvollaAttentionNr   r   r7   r6   r  r    r   r7   r  c                       e Zd Zdedef fdZ	 	 	 	 	 	 	 	 	 	 	 	 ddej                  deej                  ej                  f   de	ej                     de	ej                     de	e   d	e	e   d
e	ej                     de	ej                     de	ej                     de	ej                     de	ej                     de	ej                     de	ej                     de	ej                     fdZ xZS )EvollaDecoderLayerr4   r   c                     t         |   ||       |dz   t        |j                  |j                  z  d      z  dk(  rt        ||j                        | _        y y )Nr(   r   )rI  )r/   r0   maxnum_hidden_layersaligner_num_add_layersrH  r{   adapterr3   r4   r   r5   s      r6   r0   zEvollaDecoderLayer.__init__  s[    +MS!9!9V=Z=Z!Z\]^^bcc>$*$6$6DL dr7   r   position_embeddingsr   r1   r  	use_cachecache_positionr  r  r  r  r  r  re  c                    |}| j                  |      } | j                  d|||||||d|\  }}||z   }|}| j                  |      }| j                  |      }||z   }t	        | d      r| j                  |||	|
||||      }|S )N)r   r   r1   r  r  r  r  r  )ra  r  r  r  re  r  r  r  r   )input_layernorm	self_attnpost_attention_layernormmlpr}   r  )r3   r   r  r   r1   r  r  r  r  r  r  r  r  r  re  rB  r  r0  s                     r6   rl   zEvollaDecoderLayer.forward  s    $ !,,]; *4>> 	
')%)) 3	
 	
q !=0 !55mD/ =04# LL*"3$7+ /#5%9- ) 	M r7   )NNNFNNNNNNNN)r8   r9   r:   r)   rn   r0   rB   r   ro   r   rF  r	   r   rl   r;   r<   s   @r6   r  r    sG   |   2637*.$)59486:04597;15265||5 #5<<#=>5 !.	5
 u//05 !5 D>5 !!1!125 $ELL15 &ell35  -5 %U\\25 'u||45 !.5 "%,,/5r7   r  c                       e Zd ZdZg dZd Zy)EvollaPreTrainedModelF)r  r  rH  c                    | j                   j                  }t        j                  |       t	        |t
              rd|j                  j                          |j                  j                          |j                  j                  j                  j                  d       y t	        |t              r(|j                  j                  j                  d|       y y )NrQ   r   r   )r4   r   r%   r   r   rH  rZ  r   r[  rW  r   r   r   r  r   r   r   s      r6   r   z#EvollaPreTrainedModel._init_weights	  s    kk++**62fAB!!'')OO!!#!!((--33C8 ABNN''Sc': Cr7   N)r8   r9   r:   _supports_attention_backendr   r   r   r7   r6   r  r    s    "';r7   r  c            !           e Zd Zdef fdZd Zd Zee	 	 	 	 	 	 	 	 	 	 	 	 	 dde	j                  dee	j                     dee	j                     dee   d	ee	j                     d
ee   dee	j                     dee	j                     dee	j                     dee	j                     dee	j                     dee	j                     dee	j                     deeef   fd              Z xZS )EvollaModelr4   c           	      F   t         |   |       |j                  | _        |j                  | _        t        j                  | j                  |j                  | j                        | _        t        |      | _
        t        j                  t        |j                        D cg c]  }t        ||       c}      | _        t!        |j                  |j"                        | _        t'        |      | _        t+        |dd      | _        | j/                          y c c}w )Nr>  )r4   r   )epsgradient_checkpointingF)r/   r0   pad_token_idr   
vocab_sizer   r   r{   embed_tokensr<  protein_encoderr&  r(  r  r  r'  rV  rms_norm_epsr  r  
rotary_embr   r  	post_initr  s      r6   r0   zEvollaModel.__init__  s     !.. ++LL&:L:LdN^N^_36Bmm "'v'?'?!@
 	 #!'
 "&"4"4&:M:MN	/v>&-f6NPU&V#s   $Dc                     | j                   S r.   r  r   s    r6   r   z EvollaModel.get_input_embeddings*  s       r7   c                     || _         y r.   r  r   s     r6   r   z EvollaModel.set_input_embeddings-  s
    !r7   r   r   r1   past_key_valuesr   r  r  protein_input_idsprotein_attention_maskstructure_feats	msa_featsr  r  rj   c                    |du |duz  rt        d      || j                  |      }|r|
t               }|F||j                         nd}t	        j
                  |||j                  d   z   |j                        }||j                  d      }d}d}|S|	Q| j                  ||	      }|j                  }t	        j                  dg|j                  d   z  |j                        }t        | j                  ||||      }|}| j                  ||      }| j                  D ]  } ||f||||||||
|||||d	|} | j!                  |      }t#        ||
      }|S )a;  
        protein_input_ids (torch.LongTensor):
            The input IDs for the protein sequence in structure-aware tokens. Should be of shape `(batch_size, protein_seq_length)` and type `torch.LongTensor`.
        protein_attention_mask (torch.Tensor):
            The attention mask for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.Tensor`.
        structure_feats (torch.FloatTensor):
            The input IDs for purely structure-based features. Should be of shape `(batch_size, structure_seq_length, structure_feat_dim)` and type `torch.FloatTensor`. Dummy input for now.
        msa_feats (torch.FloatTensor):
            The input IDs for purely MSA-based features. Should be of shape `(batch_size, msa_seq_length, msa_feat_dim)` and type `torch.FloatTensor`. Dummy input for now.
        structure_batch_mask (torch.Tensor):
            The batch mask to decide which protein sequences are purely structure-based. Should be of shape `(batch_size)` and type `torch.Tensor`. Should be paired with `structure_feats`. Dummpy input for now.
        msa_batch_mask (torch.Tensor):
            The batch mask to decide which protein sequences are purely MSA-based. Should be of shape `(batch_size)` and type `torch.Tensor`. Should be paired with `msa_feats`. Dummpy input for now.
        Nz:You must specify exactly one of input_ids or inputs_embedsr   r(   r]   r   T)r4   input_embedsr   r  r  )r   r1   r  r  r  r  r  r  r  r  r  r  re  )r   r  )r~   r  r
   get_seq_lengthrB   rU   rJ   r^   	unsqueezer  r9  rY  r   r4   r  r'  r  r   )r3   r   r   r1   r  r   r  r  r  r  r  r  r  r  rB  past_seen_tokensprotein_featsr  protein_outputscausal_maskr   r  decoder_layeroutputs                           r6   rl   zEvollaModel.forward0  s   B -t";<YZZ  --i8M0*nO!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L!(-C-O"22+5 3 O ,FFM!&tf7H7N7Nq7Q.QZkZrZr!s(;;&))+
 & #oom\J![[ 	M)*).#-$7"/$3'#5%9- . M	& 		-0(++
 r7   )NNNNNNNNNNNNN)r8   r9   r:   r)   r0   r   r   r   r   rB   rF  r   r   r	   r:  r   r   ro   r   rl   r;   r<   s   @r6   r  r    sw   | *!"  '+1537+/59$(598<9=7;157;15b##b !.b u//0	b
 "%b   1 12b D>b !!1!12b $E$4$45b !) 6b "%"3"34b E--.b 'u||4b !.b  
u--	.!b  br7   r  c                       e Zd Z fdZd Zd Zee	 	 	 	 	 	 	 ddej                  de
ej                     de
ej                     de
ej                     dej                  d	e
ej                     d
e
e   fd              Z xZS )EvollaForProteinText2Textc                     t         |   |       t        |      | _        |j                  | _        t        j                  |j                  | j                  d      | _        | j                          y r  )
r/   r0   r  r?  r  r   r   r{   lm_headr  r2   s     r6   r0   z"EvollaForProteinText2Text.__init__  sQ      (
 ++yy!3!3T__5Qr7   c                 6    | j                   j                         S r.   )r?  r   r   s    r6   r   z.EvollaForProteinText2Text.get_input_embeddings  s    zz..00r7   c                 8    | j                   j                  |      S r.   )r?  r   r   s     r6   r   z.EvollaForProteinText2Text.set_input_embeddings  s    zz..u55r7   r   r   r   labelsr  r  r  c           
          | j                   d||||||d|}	|	d   }
| j                  |
      }d}|  | j                  d||| j                  d|}t	        |||	j
                  |	j                  |	j                        }|S )a,  
        protein_input_ids (torch.LongTensor):
            The input IDs for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.LongTensor`.
        protein_attention_mask (torch.Tensor):
            The attention mask for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.Tensor`.

        Example:

        ```python
        >>> from transformers import EvollaProcessor, EvollaForProteinText2Text
        >>> model = EvollaForProteinText2Text.from_pretrained("westlake/Evolla-10B-hf")
        >>> processor = EvollaProcessor.from_pretrained("westlake/Evolla-10B-hf")

        >>> protein_information = {
            "aa_seq": "your amino acid sequence",
            "foldseek": "your foldseek sequence",
        }
        >>> question = "What is the function of this protein?"
        >>> message = [
            {"role": "system", "content": "You are an AI expert that can answer any questions about protein."},
            {"role": "user", "content": question},
        ]

        >>> inputs = processor(proteins=[protein_information], messages_list=[message], return_tensors="pt", padding="longest")
        >>> outputs = model.generate(**inputs)

        >>> print(processor.batch_decode(outputs, skip_special_tokens=True))
        ```)r   r   r   r  r  r  r   N)logitsr  r  )lossr  r  r   r   r   )r?  r  loss_functionr  r   r  r   r   )r3   r   r   r   r  r  r  r  rB  outputsr   r  r  
lm_outputss                 r6   rl   z!EvollaForProteinText2Text.forward  s    T $** 
)'/#9
 
  
m,%4%%iVFtibhiD+#33!//))

 r7   r  )r8   r9   r:   r0   r   r   r   r   rB   rF  r   r   r:  r   rl   r;   r<   s   @r6   r  r    s    16  '+1559-1.29=$(?##? !.?   1 12	?
 ))*? !++? !) 6? D>?  ?r7   r  )r  r  r  )Ur   dataclassesr   typingr   r   rB   torch.utils.checkpointr   r   cache_utilsr	   r
   
generationr   masking_utilsr   modeling_outputsr   r   r   r   modeling_utilsr   r   r   utilsr   r   r   utils.genericr   esm.modeling_esmr   r   r   r   r   r   r   r    r!   llama.modeling_llamar"   r#   r$   r%   r&   r'   configuration_evollar)   r*   
get_loggerr8   loggerr,   rG   rM   rz   rO   rq   r   r   r   r   r   r   r   r   r   r   r  r  r8  r<  rH  rV  r  r  r  r  r  r  r  __all__r   r7   r6   <module>r     s      ! "    . ) /  U T 
 0
 
 
  = 
		H	%!] !(
2(
")) (
V# 0")) #D	] 		L 		 		 		 		* 		 	 */ * **_'!< _'D7 		 7 tA		 A'.		 '.T ?k ?  ?
299 
$k")) k\	L 		0 		 		n 	>* >B;0 ;&@' @FP 5 Pf Pr7   