peft-internal-testing
/

tiny-random-qwen-1.5-MoE

Text Generation

Inference Endpoints

Model card Files Files and versions Community

smangrul commited on Apr 12, 2024

Commit

4c13f33

·

verified ·

1 Parent(s): 26c92d0

Create make_tiny_model.py

Files changed (1) hide show

make_tiny_model.py +58 -0

make_tiny_model.py ADDED Viewed

	@@ -0,0 +1,58 @@

+# Adapted from https://huggingface.co/stas/tiny-random-llama-2/blob/main/make_tiny_model.py
+import subprocess
+import shlex
+import torch
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
+mname_from = "Qwen/Qwen1.5-MoE-A2.7B"
+mname_tiny = "peft-internal-testing/tiny-random-qwen-1.5-MoE"
+vocab_keep_items = 3000
+config = AutoConfig.from_pretrained(mname_from)
+# print("orig config", config)
+config.update(dict(
+    hidden_size=16,
+    intermediate_size=64,
+    num_attention_heads=4,
+    num_hidden_layers=2,
+    max_position_embeddings=256,
+    num_key_value_heads=4,
+    vocab_size=vocab_keep_items,
+    num_experts=4,
+    num_experts_per_tok=2
+))
+print("new config", config)
+# create a tiny random model
+tiny_model = AutoModelForCausalLM.from_config(config)
+print(f"num of params {tiny_model.num_parameters()}")
+# shrink it more and save
+tiny_model.bfloat16() # half-size
+tiny_model.save_pretrained(mname_tiny)
+# shrink the tokenizer from 32k to 3k vocab
+tokenizer_fast = AutoTokenizer.from_pretrained(mname_from)
+tmp_dir = f"/tmp/{mname_from}"
+tokenizer_fast.save_pretrained(tmp_dir)
+# resize tokenizer.json (vocab.txt will be automatically resized on save_pretrained)
+# perl  -0777 -pi -e 's|(2999).*|$1},"merges": []}}|msg' tokenizer.json # 0-indexed, so vocab_keep_items-1!
+closing_pat = '},"merges": []}}'
+cmd = (f"perl -0777 -pi -e 's|({vocab_keep_items-1}).*|$1{closing_pat}|msg' {tmp_dir}/tokenizer.json")
+#print(f"Running:\n{cmd}")
+result = subprocess.run(shlex.split(cmd), capture_output=True, text=True)
+#print(result)
+# reload with modified tokenizer
+tokenizer_fast_tiny = AutoTokenizer.from_pretrained(tmp_dir)
+tokenizer_fast_tiny.save_pretrained(mname_tiny)
+# test the new model and tokenizer function
+model_inputs = tokenizer_fast_tiny("Making tiny model", return_tensors="pt")
+gen_tokens = tiny_model.generate(**model_inputs, max_new_tokens=100)
+print(tokenizer_fast_tiny.batch_decode(gen_tokens, skip_special_tokens=True))
+print("Random output should be expected, but no crashing")
+print(f"Model+Tokenizer saved in {mname_tiny}")