fix state_dict loading in MoE model

Files changed (2) hide show

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "./LLaMA_MoE/",
   "add_weight_norm": false,
   "architectures": [
     "LlamaMoEForCausalLM"

 {
+  "_name_or_path": "JuncaiL/llama-265m",
   "add_weight_norm": false,
   "architectures": [
     "LlamaMoEForCausalLM"

modeling_llama_moe_hf.py CHANGED Viewed

@@ -1669,13 +1669,3 @@ class LlamaMoEForCausalLM(LlamaMoEPreTrainedModel):
     def reset_experts(self):
         self.model.reset_experts()
-    @classmethod
-    def from_pretrained(cls, *model_args, **kwargs):
-        config = kwargs.pop("config", None)
-        model = cls(config)
-        state_dict = kwargs.pop("moe_state_dict", None)
-        if state_dict is not None:
-            model.load_state_dict(state_dict)
-        return model