import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download # Model identifier from Hugging Face adapter_repo = "Mat17892/llama_lora_gguf" # Hugging Face model ID # Download the GGUF file from Hugging Face lora_adapter_path = hf_hub_download(repo_id=adapter_repo, filename="llama_lora_adapter.gguf") from huggingface_hub import hf_hub_download # Download the base model GGUF file base_model_repo = "unsloth/Llama-3.2-3B-Instruct-GGUF" base_model_path = hf_hub_download(repo_id=base_model_repo, filename="Llama-3.2-3B-Instruct-Q8_0.gguf") # Load the base model print("Loading base model...") llm = Llama(model_path=base_model_path, n_ctx=2048, n_threads=8) # Apply the LoRA adapter print("Applying LoRA adapter...") llm.load_adapter(adapter_path=lora_adapter_path) print("Model ready with LoRA adapter!") # Chat function def chat_with_model(user_input, chat_history): """ Process user input and generate a response from the model. :param user_input: User's input string :param chat_history: List of [user_message, ai_response] pairs :return: Updated chat history """ # Construct the prompt from chat history prompt = "" for user, ai in chat_history: prompt += f"User: {user}\nAI: {ai}\n" prompt += f"User: {user_input}\nAI:" # Add the latest user input # Generate response from the model raw_response = llm(prompt)["choices"][0]["text"].strip() # Clean the response (remove extra tags, if any) response = raw_response.split("User:")[0].strip() # Update chat history with the new turn chat_history.append((user_input, response)) return chat_history, chat_history # Gradio UI with gr.Blocks() as demo: gr.Markdown("# 🦙 LLaMA GGUF Chatbot") chatbot = gr.Chatbot(label="Chat with the GGUF Model") with gr.Row(): with gr.Column(scale=4): user_input = gr.Textbox(label="Your Message", placeholder="Type a message...") with gr.Column(scale=1): submit_btn = gr.Button("Send") chat_history = gr.State([]) # Link components submit_btn.click( chat_with_model, inputs=[user_input, chat_history], outputs=[chatbot, chat_history], show_progress=True, ) # Launch the app demo.launch()