algorythmtechnologies's picture
fix: Add workaround for incorrect base model path
3a3e728
import gradio as gr
import torch
from transformers import AutoTokenizer, TextIteratorStreamer, AutoModelForCausalLM, AutoConfig
import requests
import json
from peft import PeftModel
from threading import Thread
import os
# --- Configuration ---
# The model is loaded from the Hugging Face Hub
BASE_MODEL_PATH = "algorythmtechnologies/zenith_coder_v1.1"
# Name of the environment variable for the Hugging Face token
HF_TOKEN_ENV_VAR = "HUGGING_FACE_HUB_TOKEN"
# --- Model Loading ---
# Get the Hugging Face token from environment variables
hf_token = os.environ.get(HF_TOKEN_ENV_VAR)
if not hf_token:
raise ValueError(f"Environment variable {HF_TOKEN_ENV_VAR} not set. Please set it in your Space secrets.")
# Load the tokenizer from the Hub, using the token for private models
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_PATH, use_auth_token=hf_token)
from transformers import AutoConfig
# Load the config from the user's repo
config = AutoConfig.from_pretrained(BASE_MODEL_PATH, use_auth_token=hf_token)
# Correct the base model path in the config
config._name_or_path = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
# Load the base model from the Hub using the corrected config
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL_PATH,
config=config,
trust_remote_code=True,
low_cpu_mem_usage=True,
torch_dtype=torch.bfloat16,
use_auth_token=hf_token
)
# Move model to the appropriate device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model.to(device)
# The PEFT model is loaded from the same repository.
# PeftModel will automatically find the adapter configuration.
model = PeftModel.from_pretrained(base_model, BASE_MODEL_PATH, use_auth_token=hf_token)
model.eval()
# --- Web Search Function ---
def search(query):
"""Performs a web search using the Serper API."""
serper_api_key = os.environ.get("SERPER_API_KEY")
if not serper_api_key:
return "SERPER_API_KEY not found. Please set it as an environment variable in your Hugging Face Space secrets."
url = "https://google.serper.dev/search"
payload = json.dumps({"q": query})
headers = {
'X-API-KEY': serper_api_key,
'Content-Type': 'application/json'
}
try:
response = requests.request("POST", url, headers=headers, data=payload)
response.raise_for_status()
results = response.json()
return results.get('organic', [])
except requests.exceptions.RequestException as e:
return f"Error during web search: {e}"
# --- Response Generation ---
def generate_response(message, history):
"""Generates a response from the model, with optional web search."""
# Handle web search command
if message.lower().startswith("search for "):
search_query = message[len("search for "):]
search_results = search(search_query)
if isinstance(search_results, str): # Error case
yield search_results
return
if not search_results:
yield "No search results found."
return
context = " ".join([res.get('snippet', '') for res in search_results[:5]])
# Prepend context to the user's message
message = f"Based on the following search results, answer the user's query.\nContext: {context}\n\nUser Query: {message}"
# Format chat history and new message using the tokenizer's chat template
chat_messages = []
for user_msg, assistant_msg in history:
chat_messages.append({"role": "user", "content": user_msg})
if assistant_msg:
chat_messages.append({"role": "assistant", "content": assistant_msg})
chat_messages.append({"role": "user", "content": message})
# Apply the chat template
prompt = tokenizer.apply_chat_template(chat_messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors="pt").to(device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
# Run generation in a separate thread
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
# Yield generated text as it becomes available
generated_text = ""
for new_text in streamer:
generated_text += new_text
yield generated_text
# --- Gradio UI ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="sky", secondary_hue="blue")) as demo:
gr.HTML("<h1 align='center'>Zenith V1.1 Coder</h1>")
gr.Markdown("This Space is running [zenith_coder_v1.1](https://huggingface.co/algorythmtechnologies/zenith_coder_v1.1).<br>You can ask coding questions or use the 'search for <query>' command to browse the web.")
gr.ChatInterface(
generate_response,
chatbot=gr.Chatbot(
height=600,
avatar_images=(None, "https://i.imgur.com/9kAC4pG.png"),
bubble_full_width=False,
),
textbox=gr.Textbox(
placeholder="Ask me a question or type 'search for <your query>'...",
container=False,
scale=7,
),
submit_btn="Send",
retry_btn=None,
undo_btn=None,
clear_btn="Clear History",
)
if __name__ == "__main__":
# Before launching, remind the user to set the token if it's not found.
if not os.environ.get(HF_TOKEN_ENV_VAR):
print(f"CRITICAL: Environment variable {HF_TOKEN_ENV_VAR} not found.")
print("Please set this as a secret in your Hugging Face Space settings.")
demo.launch(share=True)