Spaces:
Running
Running
File size: 16,158 Bytes
aa6f369 86d3caa aa6f369 8531a26 86d3caa aa6f369 86d3caa aa6f369 86d3caa f5044d7 86d3caa aa6f369 4d76afc 86d3caa aa6f369 86d3caa 4d76afc 338b431 86d3caa 338b431 86d3caa 338b431 86d3caa 338b431 86d3caa 338b431 86d3caa b647320 8941b06 338b431 86d3caa 38f5ac1 338b431 86d3caa 4d76afc 86d3caa 338b431 86d3caa 338b431 4d76afc 86d3caa 338b431 86d3caa 4d76afc aa6f369 8531a26 aa6f369 4d76afc 8531a26 338b431 86d3caa 38f5ac1 86d3caa 4d76afc aa6f369 f5a64b7 aa6f369 86d3caa aa6f369 86d3caa aa6f369 b647320 86d3caa aa6f369 8531a26 b647320 86d3caa e8a0246 aa6f369 86d3caa 4d76afc 86d3caa 8531a26 86d3caa 4d76afc aa6f369 86d3caa 8531a26 4d76afc 86d3caa aa6f369 86d3caa e3eee09 aa6f369 4d76afc 8531a26 86d3caa 338b431 86d3caa 6541c57 f5a64b7 86d3caa aa6f369 86d3caa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 |
import gradio as gr
from huggingface_hub import InferenceClient, HfHubHTTPError
import os
import re
import traceback
# --- Configuration ---
API_TOKEN = os.getenv("HF_TOKEN", None)
# MODEL = "Qwen/Qwen3-32B" # This is a very large model, might require specific inference endpoint/hardware
# Let's try a smaller, generally available model for testing first, e.g., Mixtral
# You can change this back if you are sure Qwen3-32B is available and configured for your space/token
# MODEL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
# Or uncomment the Qwen model if you are certain it's correctly set up for inference:
MODEL = "Qwen/Qwen3-32B"
# i have used Qwen3 because its quiet compatible
# --- Hugging Face Client Initialization ---
print("--- App Start ---")
if not API_TOKEN:
print("Warning: HF_TOKEN environment variable not set. Using anonymous access.")
print("Certain models might require a token for access.")
else:
print(f"HF_TOKEN found (length={len(API_TOKEN)}).") # Don't print the token itself
try:
print(f"Initializing Inference Client for model: {MODEL}")
# Explicitly pass token=None if not found, though InferenceClient handles it.
client = InferenceClient(model=MODEL, token=API_TOKEN if API_TOKEN else None)
print("Inference Client Initialized Successfully.")
# Optional: Add a quick test call if feasible, but be mindful of potential costs/rate limits
# try:
# client.text_generation("test", max_new_tokens=1)
# print("Test generation successful.")
# except Exception as test_e:
# print(f"Warning: Test generation failed. Client might be initialized but model access could be problematic. Error: {test_e}")
except HfHubHTTPError as http_err:
# More specific error handling for HTTP errors (like 401 Unauthorized, 403 Forbidden, 404 Not Found)
error_message = (
f"Failed to initialize model client for {MODEL} due to an HTTP error.\n"
f"Status Code: {http_err.response.status_code}\n"
f"Error: {http_err}\n"
f"Check:\n"
f"1. If '{MODEL}' is a valid model ID on Hugging Face Hub.\n"
f"2. If the model requires gating or specific permissions.\n"
f"3. If your HF_TOKEN is correct and has the necessary permissions (set as a Secret in your Space).\n"
f"4. If the default Inference API supports this model or if a dedicated Inference Endpoint is needed."
)
print(f"ERROR: {error_message}")
raise gr.Error(error_message)
except Exception as e:
error_message = (
f"An unexpected error occurred while initializing the model client for {MODEL}.\n"
f"Error Type: {type(e).__name__}\n"
f"Error: {e}\n"
f"Traceback:\n{traceback.format_exc()}\n" # Add traceback
f"Check HF_TOKEN, model availability, network connection, and Space resources."
)
print(f"ERROR: {error_message}")
raise gr.Error(error_message)
# --- Helper Functions ---
# Parse all ```filename.ext\n<code>``` blocks
def parse_code_blocks(response: str) -> list:
pattern = r"```([^\n]+)\n(.*?)```"
blocks = re.findall(pattern, response, re.DOTALL)
files = []
for filename, code in blocks:
filename = filename.strip()
code = code.strip()
# Basic language detection (can be expanded)
lang = None
if filename.endswith(".py"):
lang = "python"
elif filename.endswith(".js"):
lang = "javascript"
elif filename.endswith(".html"):
lang = "html"
elif filename.endswith(".css"):
lang = "css"
elif filename.endswith(".json"):
lang = "json"
elif filename.endswith(".md"):
lang = "markdown"
elif filename.endswith(".sh") or filename.endswith(".bash"):
lang = "bash"
elif filename.endswith(".java"):
lang = "java"
# Add more extensions as needed
files.append({
"filename": filename,
"language": lang,
"code": code
})
# Add logging to see what's parsed
# print(f"Parsed {len(files)} code blocks.")
# for i, f in enumerate(files):
# print(f" Block {i}: filename='{f['filename']}', lang='{f['language']}', code_len={len(f['code'])}")
return files
def strip_think_tags(text: str) -> str:
return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
def extract_thoughts(text: str) -> str:
matches = re.findall(r"<think>(.*?)</think>", text, flags=re.DOTALL)
# Only return the last thought block for cleaner display? Or join all? Let's join.
return "\n---\n".join(match.strip() for match in matches).strip()
# --- System Message ---
system_message = (
"You are a helpful AI assistant specialized in generating website code. "
"Generate all the necessary files based on the user's request. "
"Output each file within a separate markdown code block formatted exactly like this:\n"
"```filename.ext\n"
"<code>\n"
"```\n"
"Do not add any explanatory text outside the code blocks. Ensure the filenames have appropriate extensions. "
"If you need to think step-by-step, use <think>...</think> tags. These tags will be hidden from the final user output but help guide your generation process."
)
# --- Code Generation Function ---
def generate_code(prompt, backend_choice, max_tokens, temperature, top_p):
if not prompt:
# Handle empty prompt case
yield [], gr.update(value="Please enter a description for the website.", visible=True)
return
# Use f-string formatting for clarity
user_prompt = f"USER_PROMPT: {prompt}\nUSER_BACKEND_PREFERENCE: {backend_choice}"
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": user_prompt}
]
full_response = ""
current_thoughts = ""
accumulated_error = "" # Accumulate errors during stream
# Reset outputs: Clear previous code blocks and show/clear thinking box
# Yield an empty list to the gr.Column to clear it.
# Make thinking box visible but empty.
yield [], gr.update(visible=True, value="Generating code...")
print(f"\n--- Generating Code ---")
print(f"Prompt: {prompt[:100]}...") # Log truncated prompt
print(f"Backend: {backend_choice}, Max Tokens: {max_tokens}, Temp: {temperature}, Top-P: {top_p}")
try:
stream = client.chat_completion(
messages=messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature if temperature > 0 else 0.01, # Ensure temp is positive
top_p=top_p,
# Consider adding stop sequences if the model tends to run on
# stop=["```\n\n", "\n\nHuman:", "\n\nUSER:"] # Example stop sequences
)
code_updates = [] # Store the gr.Code components to yield
for i, message in enumerate(stream):
# Check for errors in the stream message (some providers might include error info)
if hasattr(message, 'error') and message.error:
accumulated_error += f"Error in stream chunk {i}: {message.error}\n"
print(f"ERROR in stream chunk {i}: {message.error}")
continue # Skip this chunk if it's an error indicator
# Ensure the path to content is correct
try:
# Common path: message.choices[0].delta.content
token = message.choices[0].delta.content
# Handle potential None token at the end of the stream or in error cases
if token is None:
token = ""
# print(f"Token {i}: '{token}'") # DEBUG: print each token
except (AttributeError, IndexError, TypeError) as e:
# Handle unexpected message structure
print(f"Warning: Could not extract token from stream message {i}. Structure: {message}. Error: {e}")
token = "" # Assign empty string to avoid breaking accumulation
if isinstance(token, str):
full_response += token
# Update thinking box periodically (e.g., every 10 tokens or if thoughts change)
if i % 10 == 0 or "<think>" in token or "</think>" in token:
thoughts = extract_thoughts(full_response)
if thoughts != current_thoughts:
current_thoughts = thoughts
# Don't yield code_updates here yet, only update thoughts
yield code_updates, gr.update(value=current_thoughts if current_thoughts else "Thinking...", visible=True)
# Update code blocks less frequently or when a block seems complete
# Heuristic: update if the response ends with ```
if token.strip().endswith("```") or i % 20 == 0: # Adjust frequency as needed
cleaned_response = strip_think_tags(full_response)
parsed_files = parse_code_blocks(cleaned_response)
# Create gr.Code components for the parsed files
# Compare with existing code_updates to avoid redundant updates if content hasn't changed significantly
new_code_updates = []
changed = False
if len(parsed_files) != len(code_updates):
changed = True
else:
# Quick check if filenames/code lengths differ significantly
for idx, f in enumerate(parsed_files):
if (idx >= len(code_updates) or
f["filename"] != code_updates[idx].label or
len(f["code"]) != len(code_updates[idx].value)): # Simple length check
changed = True
break
if changed or not code_updates: # Update if changed or first time
code_updates = []
for f in parsed_files:
code_updates.append(
gr.Code(
value=f["code"],
label=f["filename"],
language=f["language"]
)
)
# Yield the list of gr.Code components to the gr.Column
# Also update thoughts (might be slightly out of sync, but acceptable)
yield code_updates, gr.update(value=current_thoughts if current_thoughts else "Thinking...", visible=True)
# --- Final Update after Stream Ends ---
print("Stream finished.")
if accumulated_error:
print(f"Errors occurred during stream:\n{accumulated_error}")
# Decide how to show this to the user, e.g., append to thoughts or show separately
current_thoughts += f"\n\n**Streaming Errors:**\n{accumulated_error}"
cleaned_response = strip_think_tags(full_response)
final_files = parse_code_blocks(cleaned_response)
print(f"Final parsed files: {len(final_files)}")
final_code_updates = []
if not final_files and not accumulated_error:
# Handle case where no code blocks were generated
final_code_updates.append(gr.Markdown("No code blocks were generated. The model might have responded with text instead, or the format was incorrect."))
print("Warning: No code blocks found in the final response.")
# Optionally show the raw response for debugging
# final_code_updates.append(gr.Code(label="Raw Response", value=cleaned_response, language="text"))
elif not final_files and accumulated_error:
final_code_updates.append(gr.Markdown(f"**Error during generation:**\n{accumulated_error}"))
else:
for f in final_files:
final_code_updates.append(
gr.Code(
value=f["code"],
label=f["filename"],
language=f["language"]
)
)
# Yield final code blocks and hide thinking box (or show final thoughts/errors)
final_thought_update = gr.update(visible=True if current_thoughts else False, value=current_thoughts)
yield final_code_updates, final_thought_update
except HfHubHTTPError as http_err:
# Handle errors during the streaming call itself
error_message = (
f"**Error during code generation (HTTP Error):**\n"
f"Status Code: {http_err.response.status_code}\n"
f"Error: {http_err}\n"
f"This could be due to rate limits, invalid input, model errors, or token issues.\n"
f"Check the Hugging Face Space logs for more details."
)
print(f"ERROR: {error_message}")
print(traceback.format_exc())
# Yield error message in the output area
yield [gr.Markdown(error_message)], gr.update(visible=False) # Hide thinking box on error
except Exception as e:
error_message = (
f"**An unexpected error occurred during code generation:**\n"
f"Error Type: {type(e).__name__}\n"
f"Error: {e}\n\n"
f"**Traceback:**\n```\n{traceback.format_exc()}\n```\n"
f"Check the Hugging Face Space logs for more details."
)
print(f"ERROR: {error_message}")
# Yield error message in the output area
yield [gr.Markdown(error_message)], gr.update(visible=False) # Hide thinking box on error
# --- Gradio Interface ---
with gr.Blocks(css=".gradio-container { max-width: 90% !important; }") as demo:
gr.Markdown("# ✨ Website Code Generator ✨")
gr.Markdown("Describe the website you want. Code files will appear below. Uses `mistralai/Mixtral-8x7B-Instruct-v0.1` by default (check code to change).") # Update description
with gr.Row():
with gr.Column(scale=2):
prompt_input = gr.Textbox(label="Website Description", lines=6, placeholder="e.g., A simple landing page with a title, a paragraph, and a button linking to example.com")
backend_radio = gr.Radio(["Static (HTML/CSS/JS)", "Flask", "Node.js"], label="Backend Preference (Influences AI)", value="Static (HTML/CSS/JS)")
generate_button = gr.Button("✨ Generate Website Code", variant="primary")
with gr.Accordion("Advanced Settings", open=False):
max_tokens_slider = gr.Slider(512, 8192, value=4096, step=256, label="Max New Tokens") # Increased max potential tokens
temperature_slider = gr.Slider(0.0, 1.2, value=0.6, step=0.05, label="Temperature (0=deterministic, >1=more creative)") # Allow 0
top_p_slider = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P (Nucleus Sampling)")
with gr.Column(scale=3):
thinking_box = gr.Textbox(label="Model Activity / Thoughts", visible=False, interactive=False, lines=2)
# Use gr.Column to hold the dynamic code blocks
# Remove the update lambda, it's not needed for Column
file_outputs = gr.Column(elem_id="code-output-area")
generate_button.click(
fn=generate_code,
inputs=[prompt_input, backend_radio, max_tokens_slider, temperature_slider, top_p_slider],
# Output to the Column and the Textbox
outputs=[file_outputs, thinking_box],
# api_name="generate_code" # Optional: for API access
)
# --- Launch ---
if __name__ == "__main__":
print("Starting Gradio App...")
# Use queue() for handling multiple users and streaming
# Set share=False unless you specifically want a public link from local execution
# Set debug=True for more detailed Gradio errors locally (remove/set False for production)
demo.queue().launch(debug=False, share=False)
print("Gradio App Launched.") |