Create make_tiny_model.py
Browse files- make_tiny_model.py +58 -0
make_tiny_model.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Adapted from https://huggingface.co/stas/tiny-random-llama-2/blob/main/make_tiny_model.py
|
2 |
+
|
3 |
+
import subprocess
|
4 |
+
import shlex
|
5 |
+
import torch
|
6 |
+
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
|
7 |
+
|
8 |
+
|
9 |
+
mname_from = "Qwen/Qwen1.5-MoE-A2.7B"
|
10 |
+
mname_tiny = "peft-internal-testing/tiny-random-qwen-1.5-MoE"
|
11 |
+
vocab_keep_items = 3000
|
12 |
+
|
13 |
+
config = AutoConfig.from_pretrained(mname_from)
|
14 |
+
# print("orig config", config)
|
15 |
+
config.update(dict(
|
16 |
+
hidden_size=16,
|
17 |
+
intermediate_size=64,
|
18 |
+
num_attention_heads=4,
|
19 |
+
num_hidden_layers=2,
|
20 |
+
max_position_embeddings=256,
|
21 |
+
num_key_value_heads=4,
|
22 |
+
vocab_size=vocab_keep_items,
|
23 |
+
num_experts=4,
|
24 |
+
num_experts_per_tok=2
|
25 |
+
))
|
26 |
+
print("new config", config)
|
27 |
+
|
28 |
+
# create a tiny random model
|
29 |
+
tiny_model = AutoModelForCausalLM.from_config(config)
|
30 |
+
print(f"num of params {tiny_model.num_parameters()}")
|
31 |
+
|
32 |
+
# shrink it more and save
|
33 |
+
tiny_model.bfloat16() # half-size
|
34 |
+
tiny_model.save_pretrained(mname_tiny)
|
35 |
+
|
36 |
+
# shrink the tokenizer from 32k to 3k vocab
|
37 |
+
tokenizer_fast = AutoTokenizer.from_pretrained(mname_from)
|
38 |
+
tmp_dir = f"/tmp/{mname_from}"
|
39 |
+
tokenizer_fast.save_pretrained(tmp_dir)
|
40 |
+
# resize tokenizer.json (vocab.txt will be automatically resized on save_pretrained)
|
41 |
+
# perl -0777 -pi -e 's|(2999).*|$1},"merges": []}}|msg' tokenizer.json # 0-indexed, so vocab_keep_items-1!
|
42 |
+
closing_pat = '},"merges": []}}'
|
43 |
+
cmd = (f"perl -0777 -pi -e 's|({vocab_keep_items-1}).*|$1{closing_pat}|msg' {tmp_dir}/tokenizer.json")
|
44 |
+
#print(f"Running:\n{cmd}")
|
45 |
+
result = subprocess.run(shlex.split(cmd), capture_output=True, text=True)
|
46 |
+
#print(result)
|
47 |
+
|
48 |
+
# reload with modified tokenizer
|
49 |
+
tokenizer_fast_tiny = AutoTokenizer.from_pretrained(tmp_dir)
|
50 |
+
tokenizer_fast_tiny.save_pretrained(mname_tiny)
|
51 |
+
|
52 |
+
# test the new model and tokenizer function
|
53 |
+
model_inputs = tokenizer_fast_tiny("Making tiny model", return_tensors="pt")
|
54 |
+
gen_tokens = tiny_model.generate(**model_inputs, max_new_tokens=100)
|
55 |
+
print(tokenizer_fast_tiny.batch_decode(gen_tokens, skip_special_tokens=True))
|
56 |
+
print("Random output should be expected, but no crashing")
|
57 |
+
|
58 |
+
print(f"Model+Tokenizer saved in {mname_tiny}")
|