
    rh                     x   d dl mZ d dlmZ d dlmZ ddlmZmZm	Z	m
Z
mZ ddlmZmZmZmZmZmZ ddlmZ  ej*                  e      Z G d	 d
e      Z G d de      Z G d de      Z G d de      Z G d de	      Z G d de      Z G d de      Z G d de
      Z G d de      Z  G d de      Z!g dZ"y)   )CausalLMOutputWithPast)Unpack)logging   )DeepseekV3DecoderLayerDeepseekV3MLPDeepseekV3MoEDeepseekV3PreTrainedModelDeepseekV3TopkRouter)Qwen3AttentionQwen3ForCausalLM
Qwen3ModelQwen3RMSNormQwen3RotaryEmbeddingTransformersKwargs   )Dots1Configc                       e Zd Zy)Dots1RMSNormN__name__
__module____qualname__     z/var/www/html/ai-insurance-compliance-backend/venv/lib/python3.12/site-packages/transformers/models/dots1/modular_dots1.pyr   r   '       r   r   c                       e Zd Zy)Dots1RotaryEmbeddingNr   r   r   r   r   r   +   r   r   r   c                       e Zd Zy)Dots1AttentionNr   r   r   r   r!   r!   /   r   r   r!   c                       e Zd Zy)Dots1MLPNr   r   r   r   r#   r#   3   r   r   r#   c                       e Zd Zy)Dots1MoENr   r   r   r   r%   r%   7   r   r   r%   c                       e Zd Zy)Dots1TopkRouterNr   r   r   r   r'   r'   ;   r   r   r'   c                   (     e Zd Zdedef fdZ xZS )Dots1DecoderLayerconfig	layer_idxc                 J    t         |           |j                  |   | _        y )N)super__init__layer_typesattention_type)selfr*   r+   	__class__s      r   r.   zDots1DecoderLayer.__init__@   s!    $00;r   )r   r   r   r   intr.   __classcell__r2   s   @r   r)   r)   ?   s    <{ <s < <r   r)   c                       e Zd Zy)Dots1PreTrainedModelNr   r   r   r   r7   r7   E   r   r   r7   c                       e Zd Zy)
Dots1ModelNr   r   r   r   r9   r9   I   r   r   r9   c                   .     e Zd Zdee   def fdZ xZS )Dots1ForCausalLMsuper_kwargsreturnc                 "    t        |   di |S )a~  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, Dots1ForCausalLM

        >>> model = Dots1ForCausalLM.from_pretrained("rednote-hilab/dots1.llm1.inst")
        >>> tokenizer = AutoTokenizer.from_pretrained("rednote-hilab/dots1.llm1.inst")

        >>> prompt = "Hey, are you conscious? Can you talk to me?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
        ```r   )r-   forward)r1   r<   r2   s     r   r?   zDots1ForCausalLM.forwardN   s    4 w...r   )r   r   r   r   r   r   r?   r4   r5   s   @r   r;   r;   M   s%    /12/ 
 / /r   r;   )r7   r9   r;   N)#modeling_outputsr   processing_utilsr   utilsr    deepseek_v3.modeling_deepseek_v3r   r   r	   r
   r   qwen3.modeling_qwen3r   r   r   r   r   r   configuration_dots1r   
get_loggerr   loggerr   r   r!   r#   r%   r'   r)   r7   r9   r;   __all__r   r   r   <module>rI      s    7 &    - 
		H	%	< 		/ 		^ 		} 		} 		* 	<. <	4 		 	/' /<r   