from llamafactory.chat import ChatModel
import gradio as gr

# Step 1: Load your model
args = dict(
    model_name_or_path="unsloth/llama-3-8b-Instruct-bnb-4bit",
    adapter_name_or_path="enzer1992/AI-Guru",
    template="llama3",
    finetuning_type="lora",
    quantization_bit=4,
    device="cpu",  # Forces CPU usage
)
chat_model = ChatModel(args)

# Step 2: Create a function for chatting
def chat(user_input, history):
    messages = history + [{"role": "user", "content": user_input}]
    response = ""
    for new_text in chat_model.stream_chat(messages):
        response += new_text
    history.append({"role": "user", "content": user_input})
    history.append({"role": "assistant", "content": response})
    return response, history

# Step 3: Create a simple interface
iface = gr.Interface(
    fn=chat,
    inputs=[gr.Textbox(label="Your Message"), gr.State()],
    outputs=[gr.Textbox(label="AI Response"), gr.State()],
    title="AI Guru Chatbot"
)

iface.launch()