Mortie1 commited on
Commit
c14f863
1 Parent(s): 189ed6c

Upload MyLLaMa

Browse files
Files changed (2) hide show
  1. config.json +4 -0
  2. configure_for_hf.py +8 -2
config.json CHANGED
@@ -2,6 +2,10 @@
2
  "architectures": [
3
  "MyLLaMa"
4
  ],
 
 
 
 
5
  "embed_dim": 1536,
6
  "model_type": "LLaMa",
7
  "n_chckpnt_segments": 24,
 
2
  "architectures": [
3
  "MyLLaMa"
4
  ],
5
+ "auto_map": {
6
+ "AutoConfig": "configure_for_hf.MyLLaMaConfig",
7
+ "AutoModelForCausalLM": "configure_for_hf.MyLLaMa"
8
+ },
9
  "embed_dim": 1536,
10
  "model_type": "LLaMa",
11
  "n_chckpnt_segments": 24,
configure_for_hf.py CHANGED
@@ -7,6 +7,7 @@ from transformers import (
7
  PretrainedConfig,
8
  PreTrainedModel,
9
  )
 
10
 
11
  from .llama import CustomAttentionLLaMa
12
 
@@ -70,11 +71,16 @@ class MyLLaMa(PreTrainedModel):
70
  pad_mask = torch.where(
71
  tensor == self.model.tokenizer.pad_token_id, False, True
72
  ).cuda()
 
73
  logits = self.model(tensor, att_mask, pad_mask)["logits"]
 
 
 
74
  if labels is not None:
75
  loss = nn.functional.cross_entropy(logits, labels)
76
- return {"loss": loss, "logits": logits}
77
- return {"logits": logits}
 
78
 
79
 
80
  AutoConfig.register("LLaMa", MyLLaMaConfig)
 
7
  PretrainedConfig,
8
  PreTrainedModel,
9
  )
10
+ from collections import namedtuple
11
 
12
  from .llama import CustomAttentionLLaMa
13
 
 
71
  pad_mask = torch.where(
72
  tensor == self.model.tokenizer.pad_token_id, False, True
73
  ).cuda()
74
+ outs = namedtuple("output", ["logits", "loss"])
75
  logits = self.model(tensor, att_mask, pad_mask)["logits"]
76
+
77
+ outs.logits = logits
78
+
79
  if labels is not None:
80
  loss = nn.functional.cross_entropy(logits, labels)
81
+ outs.loss = loss
82
+
83
+ return outs
84
 
85
 
86
  AutoConfig.register("LLaMa", MyLLaMaConfig)