#Imporing required libraries from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM import gradio as gr import torch # Defining the pipeline and the model device = torch.device("cuda" if torch.cuda.is_available() else "cpu") tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b") model = AutoModelForCausalLM.from_pretrained("huggyllama/llama-7b", device_map="auto",offload_folder="offload", torch_dtype=torch.float16) #model = model.to(device) print("***") print("Loaded tokenizer and model") print(device) print("***") pipe_flan = pipeline("text-generation", model=model, tokenizer=tokenizer) print("***") print("Created pipeline") print("***") # Text generation def generator(input): output = pipe_flan(input, max_length=50, num_return_sequences=1) return output[0]["generated_text"] # Creating the Gradio Interface demo = gr.Interface( fn=generator, inputs=gr.inputs.Textbox(lines=5, label="Input Text"), outputs=gr.outputs.Textbox(label="Generated Text") ) host, port = "0.0.0.0", 7860 print("***") print(f"Set up interface. Hosting now on {host}:{port}") print("***") # Lauching the Gradio Interface demo.launch(server_name=host, server_port=port)