Mortie1
/

new-nlp-hw3-llama3

Text Generation

Model card Files Files and versions Community

Mortie1 commited on 17 days ago

Commit

c14f863

•

1 Parent(s): 189ed6c

Upload MyLLaMa

Files changed (2) hide show

config.json +4 -0
configure_for_hf.py +8 -2

config.json CHANGED Viewed

@@ -2,6 +2,10 @@
   "architectures": [
     "MyLLaMa"
   ],
   "embed_dim": 1536,
   "model_type": "LLaMa",
   "n_chckpnt_segments": 24,

   "architectures": [
     "MyLLaMa"
   ],
+  "auto_map": {
+    "AutoConfig": "configure_for_hf.MyLLaMaConfig",
+    "AutoModelForCausalLM": "configure_for_hf.MyLLaMa"
+  },
   "embed_dim": 1536,
   "model_type": "LLaMa",
   "n_chckpnt_segments": 24,

configure_for_hf.py CHANGED Viewed

@@ -7,6 +7,7 @@ from transformers import (
     PretrainedConfig,
     PreTrainedModel,
 )
 from .llama import CustomAttentionLLaMa
@@ -70,11 +71,16 @@ class MyLLaMa(PreTrainedModel):
         pad_mask = torch.where(
             tensor == self.model.tokenizer.pad_token_id, False, True
         ).cuda()
         logits = self.model(tensor, att_mask, pad_mask)["logits"]
         if labels is not None:
             loss = nn.functional.cross_entropy(logits, labels)
-            return {"loss": loss, "logits": logits}
-        return {"logits": logits}
 AutoConfig.register("LLaMa", MyLLaMaConfig)

     PretrainedConfig,
     PreTrainedModel,
 )
+from collections import namedtuple
 from .llama import CustomAttentionLLaMa
         pad_mask = torch.where(
             tensor == self.model.tokenizer.pad_token_id, False, True
         ).cuda()
+        outs = namedtuple("output", ["logits", "loss"])
         logits = self.model(tensor, att_mask, pad_mask)["logits"]
+        outs.logits = logits
         if labels is not None:
             loss = nn.functional.cross_entropy(logits, labels)
+            outs.loss = loss
+        return outs
 AutoConfig.register("LLaMa", MyLLaMaConfig)