Spaces:

Mat17892
/

iris

Runtime error

iris / app.py

desert

init inference

def541d 7 months ago

1.83 kB

	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download

	# Model identifier from Hugging Face
	model_repo = "Mat17892/lora_llama_gguf_g14" # Hugging Face model ID

	# Download the GGUF file from Hugging Face
	model_path = hf_hub_download(repo_id=model_repo, filename="llama_lora_model.gguf")

	# Load the GGUF model using llama-cpp-python
	print("Loading model...")
	llm = Llama(model_path=model_path, n_ctx=2048, n_threads=8) # Adjust threads as needed
	print("Model loaded!")

	# Chat function
	def chat_with_model(user_input, chat_history):
	"""
	Process user input and generate a response from the model.
	:param user_input: User's input string
	:param chat_history: List of [user_message, ai_response] pairs
	:return: Updated chat history
	"""
	# Combine chat history into a single prompt
	prompt = ""
	for user, ai in chat_history:
	prompt += f"User: {user}\nAI: {ai}\n"
	prompt += f"User: {user_input}\nAI:"

	# Generate response from the model
	response = llm(prompt)["choices"][0]["text"].strip()

	# Update chat history as a list of tuples
	chat_history.append((user_input, response))
	return chat_history, chat_history

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# 🦙 LLaMA GGUF Chatbot")
	chatbot = gr.Chatbot(label="Chat with the GGUF Model")

	with gr.Row():
	with gr.Column(scale=4):
	user_input = gr.Textbox(label="Your Message", placeholder="Type a message...")
	with gr.Column(scale=1):
	submit_btn = gr.Button("Send")

	chat_history = gr.State([])

	# Link components
	submit_btn.click(
	chat_with_model,
	inputs=[user_input, chat_history],
	outputs=[chatbot, chat_history],
	show_progress=True,
	)

	# Launch the app
	demo.launch()