import spaces import gradio as gr import transformers from transformers import AutoTokenizer from transformers import pipeline tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b") model = transformers.AutoModelForCausalLM.from_pretrained( 'mosaicml/mpt-7b-instruct', trust_remote_code=True ) pipe = pipeline('text-generation', model=model, tokenizer=tokenizer, device='cuda:0') INSTRUCTION_KEY = "### Instruction:" RESPONSE_KEY = "### Response:" INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request." PROMPT_FOR_GENERATION_FORMAT = """{intro} {instruction_key} {instruction} {response_key} """.format( intro=INTRO_BLURB, instruction_key=INSTRUCTION_KEY, instruction="{instruction}", response_key=RESPONSE_KEY, ) example = "James decides to run 3 sprints 3 times a week. He runs 60 meters each sprint. How many total meters does he run a week? Explain before answering." fmt_ex = PROMPT_FOR_GENERATION_FORMAT.format(instruction=example) @spaces.GPU def run(): with torch.autocast('cuda', dtype=torch.bfloat16): return( pipe('Here is a recipe for vegan banana bread:\n', max_new_tokens=100, do_sample=True, use_cache=True)) with gr.Blocks() as app: btn = gr.Button() outp=gr.Textbox() btn.click(run,None,outp) app.launch()