Spaces:

algorythmtechnologies
/

Zenith-V1.1-Coder

Build error

App Files Files Community

Zenith-V1.1-Coder / app.py

algorythmtechnologies

fix: Add workaround for incorrect base model path

3a3e728 3 months ago

raw

history blame contribute delete

5.68 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, TextIteratorStreamer, AutoModelForCausalLM, AutoConfig
	import requests
	import json
	from peft import PeftModel
	from threading import Thread
	import os

	# --- Configuration ---
	# The model is loaded from the Hugging Face Hub
	BASE_MODEL_PATH = "algorythmtechnologies/zenith_coder_v1.1"
	# Name of the environment variable for the Hugging Face token
	HF_TOKEN_ENV_VAR = "HUGGING_FACE_HUB_TOKEN"

	# --- Model Loading ---

	# Get the Hugging Face token from environment variables
	hf_token = os.environ.get(HF_TOKEN_ENV_VAR)

	if not hf_token:
	raise ValueError(f"Environment variable {HF_TOKEN_ENV_VAR} not set. Please set it in your Space secrets.")

	# Load the tokenizer from the Hub, using the token for private models
	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, use_auth_token=hf_token)

	from transformers import AutoConfig

	# Load the config from the user's repo
	config = AutoConfig.from_pretrained(BASE_MODEL_PATH, use_auth_token=hf_token)

	# Correct the base model path in the config
	config._name_or_path = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"

	# Load the base model from the Hub using the corrected config
	base_model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL_PATH,
	config=config,
	trust_remote_code=True,
	low_cpu_mem_usage=True,
	torch_dtype=torch.bfloat16,
	use_auth_token=hf_token
	)

	# Move model to the appropriate device (GPU if available, otherwise CPU)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	base_model.to(device)

	# The PEFT model is loaded from the same repository.
	# PeftModel will automatically find the adapter configuration.
	model = PeftModel.from_pretrained(base_model, BASE_MODEL_PATH, use_auth_token=hf_token)
	model.eval()

	# --- Web Search Function ---
	def search(query):
	"""Performs a web search using the Serper API."""
	serper_api_key = os.environ.get("SERPER_API_KEY")
	if not serper_api_key:
	return "SERPER_API_KEY not found. Please set it as an environment variable in your Hugging Face Space secrets."

	url = "https://google.serper.dev/search"
	payload = json.dumps({"q": query})
	headers = {
	'X-API-KEY': serper_api_key,
	'Content-Type': 'application/json'
	}
	try:
	response = requests.request("POST", url, headers=headers, data=payload)
	response.raise_for_status()
	results = response.json()
	return results.get('organic', [])
	except requests.exceptions.RequestException as e:
	return f"Error during web search: {e}"

	# --- Response Generation ---
	def generate_response(message, history):
	"""Generates a response from the model, with optional web search."""

	# Handle web search command
	if message.lower().startswith("search for "):
	search_query = message[len("search for "):]
	search_results = search(search_query)

	if isinstance(search_results, str): # Error case
	yield search_results
	return

	if not search_results:
	yield "No search results found."
	return

	context = " ".join([res.get('snippet', '') for res in search_results[:5]])
	# Prepend context to the user's message
	message = f"Based on the following search results, answer the user's query.\nContext: {context}\n\nUser Query: {message}"

	# Format chat history and new message using the tokenizer's chat template
	chat_messages = []
	for user_msg, assistant_msg in history:
	chat_messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	chat_messages.append({"role": "assistant", "content": assistant_msg})
	chat_messages.append({"role": "user", "content": message})

	# Apply the chat template
	prompt = tokenizer.apply_chat_template(chat_messages, tokenize=False, add_generation_prompt=True)

	inputs = tokenizer(prompt, return_tensors="pt").to(device)
	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

	generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)

	# Run generation in a separate thread
	thread = Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	# Yield generated text as it becomes available
	generated_text = ""
	for new_text in streamer:
	generated_text += new_text
	yield generated_text

	# --- Gradio UI ---
	with gr.Blocks(theme=gr.themes.Soft(primary_hue="sky", secondary_hue="blue")) as demo:
	gr.HTML("<h1 align='center'>Zenith V1.1 Coder</h1>")
	gr.Markdown("This Space is running [zenith_coder_v1.1](https://huggingface.co/algorythmtechnologies/zenith_coder_v1.1).<br>You can ask coding questions or use the 'search for <query>' command to browse the web.")

	gr.ChatInterface(
	generate_response,
	chatbot=gr.Chatbot(
	height=600,
	avatar_images=(None, "https://i.imgur.com/9kAC4pG.png"),
	bubble_full_width=False,
	),
	textbox=gr.Textbox(
	placeholder="Ask me a question or type 'search for <your query>'...",
	container=False,
	scale=7,
	),
	submit_btn="Send",
	retry_btn=None,
	undo_btn=None,
	clear_btn="Clear History",
	)

	if __name__ == "__main__":
	# Before launching, remind the user to set the token if it's not found.
	if not os.environ.get(HF_TOKEN_ENV_VAR):
	print(f"CRITICAL: Environment variable {HF_TOKEN_ENV_VAR} not found.")
	print("Please set this as a secret in your Hugging Face Space settings.")
	demo.launch(share=True)