Upload MyLLaMa
Browse files- config.json +4 -0
- configure_for_hf.py +8 -2
config.json
CHANGED
@@ -2,6 +2,10 @@
|
|
2 |
"architectures": [
|
3 |
"MyLLaMa"
|
4 |
],
|
|
|
|
|
|
|
|
|
5 |
"embed_dim": 1536,
|
6 |
"model_type": "LLaMa",
|
7 |
"n_chckpnt_segments": 24,
|
|
|
2 |
"architectures": [
|
3 |
"MyLLaMa"
|
4 |
],
|
5 |
+
"auto_map": {
|
6 |
+
"AutoConfig": "configure_for_hf.MyLLaMaConfig",
|
7 |
+
"AutoModelForCausalLM": "configure_for_hf.MyLLaMa"
|
8 |
+
},
|
9 |
"embed_dim": 1536,
|
10 |
"model_type": "LLaMa",
|
11 |
"n_chckpnt_segments": 24,
|
configure_for_hf.py
CHANGED
@@ -7,6 +7,7 @@ from transformers import (
|
|
7 |
PretrainedConfig,
|
8 |
PreTrainedModel,
|
9 |
)
|
|
|
10 |
|
11 |
from .llama import CustomAttentionLLaMa
|
12 |
|
@@ -70,11 +71,16 @@ class MyLLaMa(PreTrainedModel):
|
|
70 |
pad_mask = torch.where(
|
71 |
tensor == self.model.tokenizer.pad_token_id, False, True
|
72 |
).cuda()
|
|
|
73 |
logits = self.model(tensor, att_mask, pad_mask)["logits"]
|
|
|
|
|
|
|
74 |
if labels is not None:
|
75 |
loss = nn.functional.cross_entropy(logits, labels)
|
76 |
-
|
77 |
-
|
|
|
78 |
|
79 |
|
80 |
AutoConfig.register("LLaMa", MyLLaMaConfig)
|
|
|
7 |
PretrainedConfig,
|
8 |
PreTrainedModel,
|
9 |
)
|
10 |
+
from collections import namedtuple
|
11 |
|
12 |
from .llama import CustomAttentionLLaMa
|
13 |
|
|
|
71 |
pad_mask = torch.where(
|
72 |
tensor == self.model.tokenizer.pad_token_id, False, True
|
73 |
).cuda()
|
74 |
+
outs = namedtuple("output", ["logits", "loss"])
|
75 |
logits = self.model(tensor, att_mask, pad_mask)["logits"]
|
76 |
+
|
77 |
+
outs.logits = logits
|
78 |
+
|
79 |
if labels is not None:
|
80 |
loss = nn.functional.cross_entropy(logits, labels)
|
81 |
+
outs.loss = loss
|
82 |
+
|
83 |
+
return outs
|
84 |
|
85 |
|
86 |
AutoConfig.register("LLaMa", MyLLaMaConfig)
|