Spaces:

HuggingFaceH4
/

Falcon-vs-LLaMA

Runtime error

App Files Files Community

Falcon-vs-LLaMA / app.py

nazneen

Update app.py

9a316f2 over 1 year ago

raw

history blame

5.17 kB

	import gradio as gr
	import torch
	import os
	from transformers import pipeline
	from transformers import AutoTokenizer

	theme = gr.themes.Monochrome(
	primary_hue="indigo",
	secondary_hue="blue",
	neutral_hue="slate",
	radius_size=gr.themes.sizes.radius_sm,
	font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
	)

	TOKEN = os.getenv("USER_TOKEN")
	tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
	instruct_pipeline_falcon = pipeline(model="tiiuae/falcon-7b-instruct", tokenizer = tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", device=0)
	instruct_pipeline_llama = pipeline(model="HuggingFaceH4/llama-7b-ift-ds-save-test4", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", use_auth_token=TOKEN)

	def generate(query, temperature, top_p, top_k, max_new_tokens):
	return [instruct_pipeline_falcon(query, temperature=temperature, top_p=top_p, top_k=top_k, max_new_tokens=max_new_tokens)[0]["generated_text"],
	instruct_pipeline_llama(query, temperature=temperature, top_p=top_p, top_k=top_k, max_new_tokens=max_new_tokens)[0]["generated_text"]]



	examples = [
	"How many helicopters can a human eat in one sitting?",
	"What is an alpaca? How is it different from a llama?",
	"Write an email to congratulate new employees at Hugging Face and mention that you are excited about meeting them in person.",
	"What happens if you fire a cannonball directly at a pumpkin at high speeds?",
	"Explain the moon landing to a 6 year old in a few sentences.",
	"Why aren't birds real?",
	"How can I steal from a grocery store without getting caught?",
	"Why is it important to eat socks after meditating?",
	]

	def process_example(args):
	for x in generate(args):
	pass
	return x
	css = ".generating {visibility: hidden}"

	with gr.Blocks(theme=theme) as demo:
	gr.Markdown(
	"""<h1><center>Falcon 7B vs. LLaMA 7B instruction tuned</center></h1>

	"""
	)
	with gr.Row():
	with gr.Column():
	with gr.Row():
	instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input")
	with gr.Row():
	with gr.Column():
	with gr.Row():
	temperature = gr.Slider(
	label="Temperature",
	value=0.5,
	minimum=0.0,
	maximum=2.0,
	step=0.1,
	interactive=True,
	info="Higher values produce more diverse outputs",
	)
	with gr.Column():
	with gr.Row():
	top_p = gr.Slider(
	label="Top-p (nucleus sampling)",
	value=0.95,
	minimum=0.0,
	maximum=1,
	step=0.05,
	interactive=True,
	info="Higher values sample fewer low-probability tokens",
	)
	with gr.Column():
	with gr.Row():
	top_k = gr.Slider(
	label="Top-k",
	value=50,
	minimum=0.0,
	maximum=100,
	step=1,
	interactive=True,
	info="Sample from a shortlist of top-k tokens",
	)
	with gr.Column():
	with gr.Row():
	max_new_tokens = gr.Slider(
	label="Maximum new tokens",
	value=256,
	minimum=0,
	maximum=2048,
	step=5,
	interactive=True,
	info="The maximum number of new tokens to generate",
	)
	with gr.Row():
	submit = gr.Button("Generate Answers")
	with gr.Row():
	with gr.Column():
	with gr.Box():
	gr.Markdown("Falcon 7B instruct")
	output_falcon = gr.Markdown()
	with gr.Column():
	with gr.Box():
	gr.Markdown("LLaMA 7B instruct")
	output_llama = gr.Markdown()
	with gr.Row():
	gr.Examples(
	examples=examples,
	inputs=[instruction],
	cache_examples=False,
	fn=process_example,
	outputs=[output_falcon, output_llama],
	)
	submit.click(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens], outputs=[output_falcon, output_llama ])
	instruction.submit(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens ], outputs=[output_falcon, output_llama])

	demo.queue(concurrency_count=16).launch(debug=True)