#!/usr/bin/env python # This script creates a super tiny model that is useful inside tests, when we just want to test that # the machinery works, without needing to check the quality of the outcomes. # # usage: adjust the configs if wanted, but otherwise just run the script from pathlib import Path from transformers import LlamaConfig, LlamaForCausalLM, LlamaTokenizer mname_tiny = "tiny-random-LlamaForCausalLM" path = Path(mname_tiny) path.mkdir(parents=True, exist_ok=True) config = LlamaConfig() config.update( dict( vocab_size=32000, hidden_size=16, intermediate_size=16 * 4, num_hidden_layers=2, num_attention_heads=4, ) ) model = LlamaForCausalLM(config) tokenizer = LlamaTokenizer.from_pretrained("path_to_llama_7b") # Test w/ one text query = "This is a test" query_tokens = tokenizer(query, return_tensors="pt") input = { "input_ids": query_tokens["input_ids"], "attention_mask": query_tokens["attention_mask"], } out_gen = model.generate(**input) text = tokenizer.batch_decode(out_gen) # Save model + config + tokenizer model.half() # makes it smaller model.save_pretrained(path) tokenizer.save_pretrained(path) # test we can load it back model = LlamaForCausalLM.from_pretrained(path) print(f"Generated {mname_tiny} - Upload the generated folder to the hub")