from llamafactory.chat import ChatModel import gradio as gr # Step 1: Load your model args = dict( model_name_or_path="unsloth/llama-3-8b-Instruct-bnb-4bit", adapter_name_or_path="enzer1992/AI-Guru", template="llama3", finetuning_type="lora", quantization_bit=4, device="cpu", # Forces CPU usage ) chat_model = ChatModel(args) # Step 2: Create a function for chatting def chat(user_input, history): messages = history + [{"role": "user", "content": user_input}] response = "" for new_text in chat_model.stream_chat(messages): response += new_text history.append({"role": "user", "content": user_input}) history.append({"role": "assistant", "content": response}) return response, history # Step 3: Create a simple interface iface = gr.Interface( fn=chat, inputs=[gr.Textbox(label="Your Message"), gr.State()], outputs=[gr.Textbox(label="AI Response"), gr.State()], title="AI Guru Chatbot" ) iface.launch()