import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer device = "cpu" # the device to load the model onto model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2") tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2") model_id = "mistralai/Mistral-7B-Instruct-v0.2" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id, load_in_8bit=False) def greet(text): messages = [ {"role": "user", "content": "What is your favourite condiment?"}, {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"}, {"role": "user", "content": "Do you have mayonnaise recipes?"} ] encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt") model_inputs = encodeds.to(device) model.to(device) generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True) decoded = tokenizer.batch_decode(generated_ids) return decoded[0] iface = gr.Interface(fn=greet, inputs="text", outputs="text") iface.launch()