Spaces:

vaseacc
/

Vase

Build error

App Files Files Community

Vase / app.py

vaseacc

Update app.py

72a9a55 verified about 1 month ago

raw

history blame contribute delete

4.58 kB

	import gradio as gr
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama

	# --- 1. MODEL LOADING ---
	# We still load the quantized GGUF model, which is perfect for CPU.
	# We will focus on Llama-3 as it's best for a general-purpose assistant.
	model_name_or_path = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF"
	model_file = "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf"

	try:
	model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_file)
	except Exception as e:
	raise RuntimeError(f"Failed to download the model. Error: {e}")

	# Load the model with llama-cpp-python
	# n_ctx is the context window size; 2048 is a safe bet for CPU Spaces.
	# n_gpu_layers=0 ensures it runs entirely on the CPU.
	try:
	llm = Llama(
	model_path=model_path,
	n_ctx=2048,
	n_threads=4, # Set to a reasonable number of threads for the CPU
	n_gpu_layers=0,
	verbose=False
	)
	except Exception as e:
	raise RuntimeError(f"Failed to load the GGUF model. Error: {e}")


	# --- 2. THE "BRAIN'S INSTRUCTION MANUAL" (SYSTEM PROMPT) ---
	# This is the most critical part. We tell the AI how to behave.
	# This prompt guides it to be helpful, analytical, and honest about its limitations.

	SYSTEM_PROMPT = """You are 'NexusAI', a helpful and highly intelligent AI assistant built by a creative developer.

	Your primary goal is to provide comprehensive, insightful, and helpful responses. You must be robust and handle any user input, no matter how brief or poorly phrased.

	When a user asks a question, follow these steps:
	1. Analyze the Intent: First, understand the user's true goal. If they ask "cost to build building?", they don't want you to invent a number. They need a checklist of cost categories to research. If their question is vague, identify what they are likely trying to accomplish.
	2. Provide a Direct Answer: If you can directly answer, do so clearly and concisely.
	3. Elaborate and Add Value: After the direct answer, provide deeper context, explain the "why" behind the answer, and offer related suggestions or next steps. Give the user more than they asked for.
	4. Acknowledge Limitations: You are not a real-time calculator, a search engine, or a financial advisor. If a question requires real-world, live data (like prices, stock quotes, personal advice), you MUST state that you cannot provide it. Instead, provide a framework or a list of steps the user can take to find the information themselves. NEVER invent facts.
	5. Maintain a Friendly, Encouraging Tone: Be a partner in the user's creative or analytical process.
	"""

	# --- 3. THE GRADIO CHAT INTERFACE ---

	def predict(message, history):
	"""
	This function is called by the Gradio ChatInterface for each new message.
	'message' is the new user input.
	'history' is the entire conversation history as a list of lists.
	"""
	# Format the conversation history for the model
	# The history format is [['user_message', 'assistant_response'], ...]
	chat_history_formatted = [{"role": "system", "content": SYSTEM_PROMPT}]
	for user_msg, assistant_msg in history:
	chat_history_formatted.append({"role": "user", "content": user_msg})
	chat_history_formatted.append({"role": "assistant", "content": assistant_msg})
	# Add the latest user message
	chat_history_formatted.append({"role": "user", "content": message})

	# Use the model to generate a response stream
	# stream=True allows the text to appear token-by-token for a better UX
	generator = llm.create_chat_completion(
	messages=chat_history_formatted,
	max_tokens=1024,
	temperature=0.7,
	stream=True
	)

	# Yield partial responses to create the streaming effect
	partial_message = ""
	for chunk in generator:
	delta = chunk['choices'][0]['delta']
	if 'content' in delta:
	partial_message += delta['content']
	yield partial_message

	# We use gr.ChatInterface, which creates a complete chat UI for us.
	# It manages history, input boxes, and message display automatically.
	gr.ChatInterface(
	fn=predict,
	title="🤖 NexusAI Assistant",
	description="A powerful, conversational AI running on a Hugging Face CPU. Ask me anything!",
	examples=[
	["How do I learn to code?"],
	["Explain the concept of 'supply and demand' like I'm five."],
	["I want to build a PC, where do I start?"],
	["I am building a building, how much would it cost me"] # The "bad" prompt from before!
	],
	theme="soft"
	).launch()