import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download # Model identifier from Hugging Face model_repo = "Mat17892/lora_llama_gguf_g14" # Hugging Face model ID # Download the GGUF file from Hugging Face model_path = hf_hub_download(repo_id=model_repo, filename="llama_lora_model.gguf") # Load the GGUF model using llama-cpp-python print("Loading model...") llm = Llama(model_path=model_path, n_ctx=2048, n_threads=8) # Adjust threads as needed print("Model loaded!") # Chat function def chat_with_model(user_input, chat_history): """ Process user input and generate a response from the model. :param user_input: User's input string :param chat_history: List of [user_message, ai_response] pairs :return: Updated chat history """ # Combine chat history into a single prompt prompt = "" for user, ai in chat_history: prompt += f"User: {user}\nAI: {ai}\n" prompt += f"User: {user_input}\nAI:" # Generate response from the model response = llm(prompt)["choices"][0]["text"].strip() # Update chat history as a list of tuples chat_history.append((user_input, response)) return chat_history, chat_history # Gradio UI with gr.Blocks() as demo: gr.Markdown("# 🦙 LLaMA GGUF Chatbot") chatbot = gr.Chatbot(label="Chat with the GGUF Model") with gr.Row(): with gr.Column(scale=4): user_input = gr.Textbox(label="Your Message", placeholder="Type a message...") with gr.Column(scale=1): submit_btn = gr.Button("Send") chat_history = gr.State([]) # Link components submit_btn.click( chat_with_model, inputs=[user_input, chat_history], outputs=[chatbot, chat_history], show_progress=True, ) # Launch the app demo.launch()