Fix model file path to match repo structure

by Pi3141 - opened Mar 30, 2023

←

Files changed (2) hide show

training_files/convert-hf-to-pth-16b.py CHANGED Viewed

@@ -1,14 +1,14 @@
-#Convert hf to pth
 import os
 import json
 import torch
 from transformers import LlamaTokenizer, LlamaForCausalLM
-tokenizer = LlamaTokenizer.from_pretrained("./llama-7b-hf")
 base_model = LlamaForCausalLM.from_pretrained(
-    "output_7b",
     load_in_8bit=False,
     torch_dtype=torch.float16,
     device_map={"": "cpu"},
@@ -29,18 +29,21 @@ n_heads = params["n_heads"]
 dim = params["dim"]
 dims_per_head = dim // n_heads
 base = 10000.0
-inv_freq = 1.0 / (base ** (torch.arange(0, dims_per_head, 2).float() / dims_per_head))
 def permute(w):
     return (
-        w.view(n_heads, dim // n_heads // 2, 2, dim).transpose(1, 2).reshape(dim, dim)
     )
 def unpermute(w):
     return (
-        w.view(n_heads, 2, dim // n_heads // 2, dim).transpose(1, 2).reshape(dim, dim)
     )
@@ -96,7 +99,7 @@ torch.save(new_state_dict, "consolidated.00.pth")
 with open("params.json", "w") as f:
     json.dump(params, f)
-#Resize tensors
 model = torch.load("consolidated.00.pth", map_location=torch.device('cpu'))
 x = model["tok_embeddings.weight"]
 y = model["output.weight"]
@@ -106,4 +109,4 @@ y = y[:row_exclude]
 model["tok_embeddings.weight"] = x
 model["output.weight"] = y
 torch.save(model, "consolidated.01.pth")
-#Delete consolidated.00.pth and rename consolidated.01.pth into consolidated.00.pth

+# Convert hf to pth
 import os
 import json
 import torch
 from transformers import LlamaTokenizer, LlamaForCausalLM
+tokenizer = LlamaTokenizer.from_pretrained("../7B-2nd-train")
 base_model = LlamaForCausalLM.from_pretrained(
+    "../7B-2nd-train",
     load_in_8bit=False,
     torch_dtype=torch.float16,
     device_map={"": "cpu"},
 dim = params["dim"]
 dims_per_head = dim // n_heads
 base = 10000.0
+inv_freq = 1.0 / \
+    (base ** (torch.arange(0, dims_per_head, 2).float() / dims_per_head))
 def permute(w):
     return (
+        w.view(n_heads, dim // n_heads // 2, 2,
+               dim).transpose(1, 2).reshape(dim, dim)
     )
 def unpermute(w):
     return (
+        w.view(n_heads, 2, dim // n_heads // 2,
+               dim).transpose(1, 2).reshape(dim, dim)
     )
 with open("params.json", "w") as f:
     json.dump(params, f)
+# Resize tensors
 model = torch.load("consolidated.00.pth", map_location=torch.device('cpu'))
 x = model["tok_embeddings.weight"]
 y = model["output.weight"]
 model["tok_embeddings.weight"] = x
 model["output.weight"] = y
 torch.save(model, "consolidated.01.pth")
+# Delete consolidated.00.pth and rename consolidated.01.pth into consolidated.00.pth

training_files/convert-hf-to-pth-32b.py CHANGED Viewed

@@ -1,14 +1,14 @@
-#Convert hf to pth
 import os
 import json
 import torch
 from transformers import LlamaTokenizer, LlamaForCausalLM
-tokenizer = LlamaTokenizer.from_pretrained("./llama-7b-hf")
 base_model = LlamaForCausalLM.from_pretrained(
-    "output_7b",
     load_in_8bit=False,
     torch_dtype=torch.float16,
     device_map={"": "cpu"},
@@ -29,18 +29,21 @@ n_heads = params["n_heads"]
 dim = params["dim"]
 dims_per_head = dim // n_heads
 base = 10000.0
-inv_freq = 1.0 / (base ** (torch.arange(0, dims_per_head, 2).float() / dims_per_head))
 def permute(w):
     return (
-        w.view(n_heads, dim // n_heads // 2, 2, dim).transpose(1, 2).reshape(dim, dim)
     )
 def unpermute(w):
     return (
-        w.view(n_heads, 2, dim // n_heads // 2, dim).transpose(1, 2).reshape(dim, dim)
     )

+# Convert hf to pth
 import os
 import json
 import torch
 from transformers import LlamaTokenizer, LlamaForCausalLM
+tokenizer = LlamaTokenizer.from_pretrained("../7B-2nd-train")
 base_model = LlamaForCausalLM.from_pretrained(
+    "../7B-2nd-train",
     load_in_8bit=False,
     torch_dtype=torch.float16,
     device_map={"": "cpu"},
 dim = params["dim"]
 dims_per_head = dim // n_heads
 base = 10000.0
+inv_freq = 1.0 / \
+    (base ** (torch.arange(0, dims_per_head, 2).float() / dims_per_head))
 def permute(w):
     return (
+        w.view(n_heads, dim // n_heads // 2, 2,
+               dim).transpose(1, 2).reshape(dim, dim)
     )
 def unpermute(w):
     return (
+        w.view(n_heads, 2, dim // n_heads // 2,
+               dim).transpose(1, 2).reshape(dim, dim)
     )