Spaces:
Paused
Paused
File size: 4,916 Bytes
42c9bc0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
"""Adapted from: https://huggingface.co/spaces/HuggingFaceH4/Falcon-vs-LLaMA/blob/main/app.py"""
#gr.Interface.load("models/Open-Orca/OpenOrca-Preview1-13B").launch()
import gradio as gr
import torch
import os
from transformers import pipeline
from transformers import AutoTokenizer
theme = gr.themes.Monochrome(
primary_hue="indigo",
secondary_hue="blue",
neutral_hue="slate",
radius_size=gr.themes.sizes.radius_sm,
font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
)
TOKEN = os.getenv("USER_TOKEN")
#tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
#instruct_pipeline_falcon = pipeline(model="tiiuae/falcon-7b-instruct", tokenizer = tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", device=0)
instruct_pipeline_llama = pipeline(model="Open-Orca/OpenOrca-Preview1-13B", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
def generate(query, temperature, top_p, top_k, max_new_tokens):
return instruct_pipeline_llama(query, temperature=temperature, top_p=top_p, top_k=top_k, max_new_tokens=max_new_tokens)[0]["generated_text"]
examples = [
"How many helicopters can a human eat in one sitting?",
"What is an alpaca? How is it different from a llama?",
"Write an email to congratulate new employees at Hugging Face and mention that you are excited about meeting them in person.",
"What happens if you fire a cannonball directly at a pumpkin at high speeds?",
"Explain the moon landing to a 6 year old in a few sentences.",
"Why aren't birds real?",
"How can I steal from a grocery store without getting caught?",
"Why is it important to eat socks after meditating?",
]
def process_example(args):
for x in generate(args):
pass
return x
css = ".generating {visibility: hidden}"
with gr.Blocks(theme=theme) as demo:
gr.Markdown(
"""<h1><center>🐋 OpenOrca-Preview1 13B GPU Playground! 🐋</center></h1>"""
)
with gr.Row():
with gr.Column():
with gr.Row():
instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input")
with gr.Row():
with gr.Column():
with gr.Row():
temperature = gr.Slider(
label="Temperature",
value=0.5,
minimum=0.0,
maximum=2.0,
step=0.1,
interactive=True,
info="Higher values produce more diverse outputs",
)
with gr.Column():
with gr.Row():
top_p = gr.Slider(
label="Top-p (nucleus sampling)",
value=0.95,
minimum=0.0,
maximum=1,
step=0.05,
interactive=True,
info="Higher values sample fewer low-probability tokens",
)
with gr.Column():
with gr.Row():
top_k = gr.Slider(
label="Top-k",
value=50,
minimum=0.0,
maximum=100,
step=1,
interactive=True,
info="Sample from a shortlist of top-k tokens",
)
with gr.Column():
with gr.Row():
max_new_tokens = gr.Slider(
label="Maximum new tokens",
value=256,
minimum=0,
maximum=2048,
step=5,
interactive=True,
info="The maximum number of new tokens to generate",
)
with gr.Row():
submit = gr.Button("Generate Answers")
with gr.Row():
with gr.Box():
gr.Markdown("**OpenOrca-Preview1**")
output_llama = gr.Markdown()
with gr.Row():
gr.Examples(
examples=examples,
inputs=[instruction],
cache_examples=False,
fn=process_example,
outputs=output_llama,
)
submit.click(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens], outputs=output_llama)
instruction.submit(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens ], outputs=output_llama)
demo.queue(concurrency_count=1).launch(debug=True) |