import transformers
import torch
import gc
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr


# Load the model
model = "tiiuae/falcon-7b-instruct"
instruction = "Draft an apology email to a customer who experienced a delay in their order and provide reassurance that the issue has been resolved"


tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
)

def predict(instruction: str):
    """
    The `predict` function takes an instruction as input and uses a pre-trained language model to
    generate a predicted sequence of text based on the instruction.
    
    :param instruction: The instruction parameter is a string that represents the input for which you
    want to generate a prediction. It could be a question, a prompt, or any other kind of input that you
    want the model to generate a response for
    :type instruction: str
    :return: The function `predict` returns a string that represents the generated text from the model.
    """
    sequences = pipeline(
        instruction,
        max_length=500,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
    )
    for seq in sequences:
        result = f"Result: {seq['generated_text']}"
    gc.collect()
    torch.cuda.empty_cache()
    return result

gr.Interface(
    predict,
    inputs=[
        gr.inputs.Textbox(lines=2, default=instruction, label="Instruction"),
    ],
    outputs=[gr.outputs.Textbox(label="Output")],
    title= "XGen"
).launch()