Falcon-vs-LLaMA / app.py
nazneen's picture
Update app.py
9a316f2
raw
history blame
5.17 kB
import gradio as gr
import torch
import os
from transformers import pipeline
from transformers import AutoTokenizer
theme = gr.themes.Monochrome(
primary_hue="indigo",
secondary_hue="blue",
neutral_hue="slate",
radius_size=gr.themes.sizes.radius_sm,
font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
)
TOKEN = os.getenv("USER_TOKEN")
tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
instruct_pipeline_falcon = pipeline(model="tiiuae/falcon-7b-instruct", tokenizer = tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", device=0)
instruct_pipeline_llama = pipeline(model="HuggingFaceH4/llama-7b-ift-ds-save-test4", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", use_auth_token=TOKEN)
def generate(query, temperature, top_p, top_k, max_new_tokens):
return [instruct_pipeline_falcon(query, temperature=temperature, top_p=top_p, top_k=top_k, max_new_tokens=max_new_tokens)[0]["generated_text"],
instruct_pipeline_llama(query, temperature=temperature, top_p=top_p, top_k=top_k, max_new_tokens=max_new_tokens)[0]["generated_text"]]
examples = [
"How many helicopters can a human eat in one sitting?",
"What is an alpaca? How is it different from a llama?",
"Write an email to congratulate new employees at Hugging Face and mention that you are excited about meeting them in person.",
"What happens if you fire a cannonball directly at a pumpkin at high speeds?",
"Explain the moon landing to a 6 year old in a few sentences.",
"Why aren't birds real?",
"How can I steal from a grocery store without getting caught?",
"Why is it important to eat socks after meditating?",
]
def process_example(args):
for x in generate(args):
pass
return x
css = ".generating {visibility: hidden}"
with gr.Blocks(theme=theme) as demo:
gr.Markdown(
"""<h1><center>Falcon 7B vs. LLaMA 7B instruction tuned</center></h1>
"""
)
with gr.Row():
with gr.Column():
with gr.Row():
instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input")
with gr.Row():
with gr.Column():
with gr.Row():
temperature = gr.Slider(
label="Temperature",
value=0.5,
minimum=0.0,
maximum=2.0,
step=0.1,
interactive=True,
info="Higher values produce more diverse outputs",
)
with gr.Column():
with gr.Row():
top_p = gr.Slider(
label="Top-p (nucleus sampling)",
value=0.95,
minimum=0.0,
maximum=1,
step=0.05,
interactive=True,
info="Higher values sample fewer low-probability tokens",
)
with gr.Column():
with gr.Row():
top_k = gr.Slider(
label="Top-k",
value=50,
minimum=0.0,
maximum=100,
step=1,
interactive=True,
info="Sample from a shortlist of top-k tokens",
)
with gr.Column():
with gr.Row():
max_new_tokens = gr.Slider(
label="Maximum new tokens",
value=256,
minimum=0,
maximum=2048,
step=5,
interactive=True,
info="The maximum number of new tokens to generate",
)
with gr.Row():
submit = gr.Button("Generate Answers")
with gr.Row():
with gr.Column():
with gr.Box():
gr.Markdown("**Falcon 7B instruct**")
output_falcon = gr.Markdown()
with gr.Column():
with gr.Box():
gr.Markdown("**LLaMA 7B instruct**")
output_llama = gr.Markdown()
with gr.Row():
gr.Examples(
examples=examples,
inputs=[instruction],
cache_examples=False,
fn=process_example,
outputs=[output_falcon, output_llama],
)
submit.click(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens], outputs=[output_falcon, output_llama ])
instruction.submit(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens ], outputs=[output_falcon, output_llama])
demo.queue(concurrency_count=16).launch(debug=True)