import transformers import torch import gc from transformers import AutoTokenizer, AutoModelForCausalLM import gradio as gr # Load the model model = "tiiuae/falcon-7b-instruct" instruction = "Draft an apology email to a customer who experienced a delay in their order and provide reassurance that the issue has been resolved" tokenizer = AutoTokenizer.from_pretrained(model) pipeline = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", ) def predict(instruction: str): """ The `predict` function takes an instruction as input and uses a pre-trained language model to generate a predicted sequence of text based on the instruction. :param instruction: The instruction parameter is a string that represents the input for which you want to generate a prediction. It could be a question, a prompt, or any other kind of input that you want the model to generate a response for :type instruction: str :return: The function `predict` returns a string that represents the generated text from the model. """ sequences = pipeline( instruction, max_length=500, do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, ) for seq in sequences: result = f"Result: {seq['generated_text']}" gc.collect() torch.cuda.empty_cache() return result gr.Interface( predict, inputs=[ gr.inputs.Textbox(lines=2, default=instruction, label="Instruction"), ], outputs=[gr.outputs.Textbox(label="Output")], title= "XGen" ).launch()