| from transformers import PreTrainedModel | |
| from transformers.models.qwen3.modeling_qwen3 import Qwen3Model, Qwen3Config | |
| import torch.nn as nn | |
| class Dolphy1ForCausalLM(PreTrainedModel): | |
| config_class = Qwen3Config | |
| def __init__(self, config): | |
| super().__init__(config) | |
| self.model = Qwen3Model(config) | |
| # If your router was saved as part of the model, this will load it automatically. | |
| # No need to redefine or reattach anything here. | |
| def forward(self, input_ids, attention_mask=None, **kwargs): | |
| return self.model(input_ids=input_ids, attention_mask=attention_mask, **kwargs) |