import torch import gradio as gr from peft import PeftModel from transformers import AutoTokenizer, AutoModelForCausalLM # Base model and your LoRA adapter base_model = "mistralai/Mistral-7B-Instruct-v0.1" adapter_repo = "gaurav2003/room-service-chatbot" # Load tokenizer and base model tokenizer = AutoTokenizer.from_pretrained(base_model) model = AutoModelForCausalLM.from_pretrained( base_model, torch_dtype=torch.float16, device_map="auto" ) # Load your LoRA adapter model = PeftModel.from_pretrained(model, adapter_repo) model.eval() # Chat function def generate_response(message, history): input_text = message inputs = tokenizer(input_text, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate(**inputs, max_new_tokens=512) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Gradio Interface chatbot = gr.ChatInterface(fn=generate_response, title="Room Service Chatbot") if __name__ == "__main__": chatbot.launch()