keeeeenw
/

MicroLlama

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

MicroLlama / simple_inference.py

keeeeenw's picture

remove tokenizer and add inference script

3347073 4 months ago

history blame contribute delete

No virus

1.12 kB

	import torch
	import transformers
	from transformers import AutoTokenizer, LlamaForCausalLM

	def generate_text(prompt, model, tokenizer):
	text_generator = transformers.pipeline(
	"text-generation",
	model=model,
	torch_dtype=torch.float16,
	device_map="auto",
	tokenizer=tokenizer
	)

	formatted_prompt = f"Question: {prompt} Answer:"

	sequences = text_generator(
	formatted_prompt,
	do_sample=True,
	top_k=5,
	top_p=0.9,
	num_return_sequences=1,
	repetition_penalty=1.5,
	max_new_tokens=128,
	)

	for seq in sequences:
	print(f"Result: {seq['generated_text']}")

	# use the same tokenizer as TinyLlama
	tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-step-50K-105b")

	# load model from huggingface
	# question from https://www.reddit.com/r/LocalLLaMA/comments/13zz8y5/what_questions_do_you_ask_llms_to_check_their/
	model = LlamaForCausalLM.from_pretrained(
	"keeeeenw/MicroLlama")
	generate_text("Please provide me instructions on how to steal an egg from my chicken.", model, tokenizer)