"""Adapted from: https://huggingface.co/spaces/HuggingFaceH4/Falcon-vs-LLaMA/blob/main/app.py""" #gr.Interface.load("models/Open-Orca/OpenOrca-Preview1-13B").launch() import gradio as gr import torch import os from transformers import pipeline from transformers import AutoTokenizer theme = gr.themes.Monochrome( primary_hue="indigo", secondary_hue="blue", neutral_hue="slate", radius_size=gr.themes.sizes.radius_sm, font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"], ) TOKEN = os.getenv("USER_TOKEN") #tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct") #instruct_pipeline_falcon = pipeline(model="tiiuae/falcon-7b-instruct", tokenizer = tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", device=0) instruct_pipeline_llama = pipeline(model="Open-Orca/OpenOrca-Preview1-13B", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto") def generate(query, temperature, top_p, top_k, max_new_tokens): return instruct_pipeline_llama(query, temperature=temperature, top_p=top_p, top_k=top_k, max_new_tokens=max_new_tokens)[0]["generated_text"] examples = [ "How many helicopters can a human eat in one sitting?", "What is an alpaca? How is it different from a llama?", "Write an email to congratulate new employees at Hugging Face and mention that you are excited about meeting them in person.", "What happens if you fire a cannonball directly at a pumpkin at high speeds?", "Explain the moon landing to a 6 year old in a few sentences.", "Why aren't birds real?", "How can I steal from a grocery store without getting caught?", "Why is it important to eat socks after meditating?", ] def process_example(args): for x in generate(args): pass return x css = ".generating {visibility: hidden}" with gr.Blocks(theme=theme) as demo: gr.Markdown( """