import spaces
import gradio as gr
import transformers
from transformers import AutoTokenizer
from transformers import pipeline

tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")

model = transformers.AutoModelForCausalLM.from_pretrained(
  'mosaicml/mpt-7b-instruct',
  trust_remote_code=True
)


pipe = pipeline('text-generation', model=model, tokenizer=tokenizer, device='cuda:0')

INSTRUCTION_KEY = "### Instruction:"
RESPONSE_KEY = "### Response:"
INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
PROMPT_FOR_GENERATION_FORMAT = """{intro}
{instruction_key}
{instruction}
{response_key}
""".format(
    intro=INTRO_BLURB,
    instruction_key=INSTRUCTION_KEY,
    instruction="{instruction}",
    response_key=RESPONSE_KEY,
)

example = "James decides to run 3 sprints 3 times a week. He runs 60 meters each sprint. How many total meters does he run a week? Explain before answering."
fmt_ex = PROMPT_FOR_GENERATION_FORMAT.format(instruction=example)

@spaces.GPU
def run():
    with torch.autocast('cuda', dtype=torch.bfloat16):
        return(
            pipe('Here is a recipe for vegan banana bread:\n',
                max_new_tokens=100,
                do_sample=True,
                use_cache=True))


with gr.Blocks() as app:
    btn = gr.Button()
    outp=gr.Textbox()
    btn.click(run,None,outp)
app.launch()