Fix model file path to match repo structure
#6
by
Pi3141
- opened
training_files/convert-hf-to-pth-16b.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
-
#Convert hf to pth
|
| 2 |
import os
|
| 3 |
import json
|
| 4 |
|
| 5 |
import torch
|
| 6 |
from transformers import LlamaTokenizer, LlamaForCausalLM
|
| 7 |
|
| 8 |
-
tokenizer = LlamaTokenizer.from_pretrained("
|
| 9 |
|
| 10 |
base_model = LlamaForCausalLM.from_pretrained(
|
| 11 |
-
"
|
| 12 |
load_in_8bit=False,
|
| 13 |
torch_dtype=torch.float16,
|
| 14 |
device_map={"": "cpu"},
|
|
@@ -29,18 +29,21 @@ n_heads = params["n_heads"]
|
|
| 29 |
dim = params["dim"]
|
| 30 |
dims_per_head = dim // n_heads
|
| 31 |
base = 10000.0
|
| 32 |
-
inv_freq = 1.0 /
|
|
|
|
| 33 |
|
| 34 |
|
| 35 |
def permute(w):
|
| 36 |
return (
|
| 37 |
-
w.view(n_heads, dim // n_heads // 2, 2,
|
|
|
|
| 38 |
)
|
| 39 |
|
| 40 |
|
| 41 |
def unpermute(w):
|
| 42 |
return (
|
| 43 |
-
w.view(n_heads, 2, dim // n_heads // 2,
|
|
|
|
| 44 |
)
|
| 45 |
|
| 46 |
|
|
@@ -96,7 +99,7 @@ torch.save(new_state_dict, "consolidated.00.pth")
|
|
| 96 |
with open("params.json", "w") as f:
|
| 97 |
json.dump(params, f)
|
| 98 |
|
| 99 |
-
#Resize tensors
|
| 100 |
model = torch.load("consolidated.00.pth", map_location=torch.device('cpu'))
|
| 101 |
x = model["tok_embeddings.weight"]
|
| 102 |
y = model["output.weight"]
|
|
@@ -106,4 +109,4 @@ y = y[:row_exclude]
|
|
| 106 |
model["tok_embeddings.weight"] = x
|
| 107 |
model["output.weight"] = y
|
| 108 |
torch.save(model, "consolidated.01.pth")
|
| 109 |
-
#Delete consolidated.00.pth and rename consolidated.01.pth into consolidated.00.pth
|
|
|
|
| 1 |
+
# Convert hf to pth
|
| 2 |
import os
|
| 3 |
import json
|
| 4 |
|
| 5 |
import torch
|
| 6 |
from transformers import LlamaTokenizer, LlamaForCausalLM
|
| 7 |
|
| 8 |
+
tokenizer = LlamaTokenizer.from_pretrained("../7B-2nd-train")
|
| 9 |
|
| 10 |
base_model = LlamaForCausalLM.from_pretrained(
|
| 11 |
+
"../7B-2nd-train",
|
| 12 |
load_in_8bit=False,
|
| 13 |
torch_dtype=torch.float16,
|
| 14 |
device_map={"": "cpu"},
|
|
|
|
| 29 |
dim = params["dim"]
|
| 30 |
dims_per_head = dim // n_heads
|
| 31 |
base = 10000.0
|
| 32 |
+
inv_freq = 1.0 / \
|
| 33 |
+
(base ** (torch.arange(0, dims_per_head, 2).float() / dims_per_head))
|
| 34 |
|
| 35 |
|
| 36 |
def permute(w):
|
| 37 |
return (
|
| 38 |
+
w.view(n_heads, dim // n_heads // 2, 2,
|
| 39 |
+
dim).transpose(1, 2).reshape(dim, dim)
|
| 40 |
)
|
| 41 |
|
| 42 |
|
| 43 |
def unpermute(w):
|
| 44 |
return (
|
| 45 |
+
w.view(n_heads, 2, dim // n_heads // 2,
|
| 46 |
+
dim).transpose(1, 2).reshape(dim, dim)
|
| 47 |
)
|
| 48 |
|
| 49 |
|
|
|
|
| 99 |
with open("params.json", "w") as f:
|
| 100 |
json.dump(params, f)
|
| 101 |
|
| 102 |
+
# Resize tensors
|
| 103 |
model = torch.load("consolidated.00.pth", map_location=torch.device('cpu'))
|
| 104 |
x = model["tok_embeddings.weight"]
|
| 105 |
y = model["output.weight"]
|
|
|
|
| 109 |
model["tok_embeddings.weight"] = x
|
| 110 |
model["output.weight"] = y
|
| 111 |
torch.save(model, "consolidated.01.pth")
|
| 112 |
+
# Delete consolidated.00.pth and rename consolidated.01.pth into consolidated.00.pth
|
training_files/convert-hf-to-pth-32b.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
-
#Convert hf to pth
|
| 2 |
import os
|
| 3 |
import json
|
| 4 |
|
| 5 |
import torch
|
| 6 |
from transformers import LlamaTokenizer, LlamaForCausalLM
|
| 7 |
|
| 8 |
-
tokenizer = LlamaTokenizer.from_pretrained("
|
| 9 |
|
| 10 |
base_model = LlamaForCausalLM.from_pretrained(
|
| 11 |
-
"
|
| 12 |
load_in_8bit=False,
|
| 13 |
torch_dtype=torch.float16,
|
| 14 |
device_map={"": "cpu"},
|
|
@@ -29,18 +29,21 @@ n_heads = params["n_heads"]
|
|
| 29 |
dim = params["dim"]
|
| 30 |
dims_per_head = dim // n_heads
|
| 31 |
base = 10000.0
|
| 32 |
-
inv_freq = 1.0 /
|
|
|
|
| 33 |
|
| 34 |
|
| 35 |
def permute(w):
|
| 36 |
return (
|
| 37 |
-
w.view(n_heads, dim // n_heads // 2, 2,
|
|
|
|
| 38 |
)
|
| 39 |
|
| 40 |
|
| 41 |
def unpermute(w):
|
| 42 |
return (
|
| 43 |
-
w.view(n_heads, 2, dim // n_heads // 2,
|
|
|
|
| 44 |
)
|
| 45 |
|
| 46 |
|
|
|
|
| 1 |
+
# Convert hf to pth
|
| 2 |
import os
|
| 3 |
import json
|
| 4 |
|
| 5 |
import torch
|
| 6 |
from transformers import LlamaTokenizer, LlamaForCausalLM
|
| 7 |
|
| 8 |
+
tokenizer = LlamaTokenizer.from_pretrained("../7B-2nd-train")
|
| 9 |
|
| 10 |
base_model = LlamaForCausalLM.from_pretrained(
|
| 11 |
+
"../7B-2nd-train",
|
| 12 |
load_in_8bit=False,
|
| 13 |
torch_dtype=torch.float16,
|
| 14 |
device_map={"": "cpu"},
|
|
|
|
| 29 |
dim = params["dim"]
|
| 30 |
dims_per_head = dim // n_heads
|
| 31 |
base = 10000.0
|
| 32 |
+
inv_freq = 1.0 / \
|
| 33 |
+
(base ** (torch.arange(0, dims_per_head, 2).float() / dims_per_head))
|
| 34 |
|
| 35 |
|
| 36 |
def permute(w):
|
| 37 |
return (
|
| 38 |
+
w.view(n_heads, dim // n_heads // 2, 2,
|
| 39 |
+
dim).transpose(1, 2).reshape(dim, dim)
|
| 40 |
)
|
| 41 |
|
| 42 |
|
| 43 |
def unpermute(w):
|
| 44 |
return (
|
| 45 |
+
w.view(n_heads, 2, dim // n_heads // 2,
|
| 46 |
+
dim).transpose(1, 2).reshape(dim, dim)
|
| 47 |
)
|
| 48 |
|
| 49 |
|