smangrul commited on
Commit
4c13f33
1 Parent(s): 26c92d0

Create make_tiny_model.py

Browse files
Files changed (1) hide show
  1. make_tiny_model.py +58 -0
make_tiny_model.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Adapted from https://huggingface.co/stas/tiny-random-llama-2/blob/main/make_tiny_model.py
2
+
3
+ import subprocess
4
+ import shlex
5
+ import torch
6
+ from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
7
+
8
+
9
+ mname_from = "Qwen/Qwen1.5-MoE-A2.7B"
10
+ mname_tiny = "peft-internal-testing/tiny-random-qwen-1.5-MoE"
11
+ vocab_keep_items = 3000
12
+
13
+ config = AutoConfig.from_pretrained(mname_from)
14
+ # print("orig config", config)
15
+ config.update(dict(
16
+ hidden_size=16,
17
+ intermediate_size=64,
18
+ num_attention_heads=4,
19
+ num_hidden_layers=2,
20
+ max_position_embeddings=256,
21
+ num_key_value_heads=4,
22
+ vocab_size=vocab_keep_items,
23
+ num_experts=4,
24
+ num_experts_per_tok=2
25
+ ))
26
+ print("new config", config)
27
+
28
+ # create a tiny random model
29
+ tiny_model = AutoModelForCausalLM.from_config(config)
30
+ print(f"num of params {tiny_model.num_parameters()}")
31
+
32
+ # shrink it more and save
33
+ tiny_model.bfloat16() # half-size
34
+ tiny_model.save_pretrained(mname_tiny)
35
+
36
+ # shrink the tokenizer from 32k to 3k vocab
37
+ tokenizer_fast = AutoTokenizer.from_pretrained(mname_from)
38
+ tmp_dir = f"/tmp/{mname_from}"
39
+ tokenizer_fast.save_pretrained(tmp_dir)
40
+ # resize tokenizer.json (vocab.txt will be automatically resized on save_pretrained)
41
+ # perl -0777 -pi -e 's|(2999).*|$1},"merges": []}}|msg' tokenizer.json # 0-indexed, so vocab_keep_items-1!
42
+ closing_pat = '},"merges": []}}'
43
+ cmd = (f"perl -0777 -pi -e 's|({vocab_keep_items-1}).*|$1{closing_pat}|msg' {tmp_dir}/tokenizer.json")
44
+ #print(f"Running:\n{cmd}")
45
+ result = subprocess.run(shlex.split(cmd), capture_output=True, text=True)
46
+ #print(result)
47
+
48
+ # reload with modified tokenizer
49
+ tokenizer_fast_tiny = AutoTokenizer.from_pretrained(tmp_dir)
50
+ tokenizer_fast_tiny.save_pretrained(mname_tiny)
51
+
52
+ # test the new model and tokenizer function
53
+ model_inputs = tokenizer_fast_tiny("Making tiny model", return_tensors="pt")
54
+ gen_tokens = tiny_model.generate(**model_inputs, max_new_tokens=100)
55
+ print(tokenizer_fast_tiny.batch_decode(gen_tokens, skip_special_tokens=True))
56
+ print("Random output should be expected, but no crashing")
57
+
58
+ print(f"Model+Tokenizer saved in {mname_tiny}")