File size: 13,154 Bytes
dfc5e93
 
5f4f3c1
58a5e73
cd13883
 
 
9376840
cd13883
 
 
58a5e73
 
cd13883
58a5e73
cd13883
 
9376840
cd13883
9376840
83207ef
cd13883
 
 
 
 
 
58a5e73
cd13883
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58a5e73
cd13883
 
 
58a5e73
 
 
cd13883
 
 
 
 
 
 
58a5e73
 
 
 
 
9376840
58a5e73
9376840
cd13883
9376840
 
58a5e73
9376840
58a5e73
 
 
 
 
9376840
83207ef
cd13883
83207ef
cd13883
 
83207ef
853d569
cd13883
58a5e73
5f4f3c1
853d569
58a5e73
853d569
9376840
 
 
58a5e73
cd13883
9376840
 
 
dfc5e93
b752712
58a5e73
b752712
 
dfc5e93
58a5e73
 
 
 
9376840
58a5e73
9376840
 
58a5e73
 
 
cd13883
9376840
58a5e73
 
9376840
58a5e73
cd13883
58a5e73
cd13883
9376840
58a5e73
 
 
 
9376840
 
58a5e73
9376840
 
 
58a5e73
9376840
cd13883
 
9376840
58a5e73
9376840
58a5e73
9376840
58a5e73
9376840
 
58a5e73
 
 
 
9376840
58a5e73
9376840
cd13883
58a5e73
9376840
58a5e73
 
 
 
 
 
 
9376840
58a5e73
 
 
9376840
 
58a5e73
 
 
 
 
 
 
9376840
58a5e73
 
 
 
9376840
 
58a5e73
 
 
 
 
 
 
9376840
 
 
 
 
58a5e73
9376840
 
58a5e73
 
 
9376840
 
 
58a5e73
9376840
58a5e73
 
 
9376840
 
58a5e73
9376840
 
58a5e73
cd13883
 
 
9376840
58a5e73
 
f4eb547
0950920
9376840
58a5e73
9376840
 
 
 
 
 
 
 
 
 
58a5e73
9376840
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58a5e73
 
9376840
 
 
 
 
 
 
 
 
58a5e73
9376840
58a5e73
 
 
 
83207ef
58a5e73
83207ef
9376840
58a5e73
9376840
83207ef
 
dfc5e93
9376840
58a5e73
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
import gradio as gr
from huggingface_hub import InferenceClient
import re
import time # For potential brief pauses if needed

# --- Hugging Face Token (Optional but Recommended) ---
# from huggingface_hub import login
# login("YOUR_HUGGINGFACE_TOKEN") # Replace with your token if needed

# --- Inference Client ---
try:
    # You might need to specify the model URL directly if the alias isn't working
    # client = InferenceClient(model="https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta")
    client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
    client.timeout = 120 # Increase timeout for potentially long generations
except Exception as e:
    print(f"Error initializing InferenceClient: {e}")
    client = None # Set client to None if initialization fails

# --- Parsing Function ---
def parse_files(raw_response):
    """
    Parses filenames and code blocks from the raw AI output.
    """
    if not raw_response:
        return []

    # Pattern: Look for a filename line followed by content until the next filename line or end of string.
    pattern = re.compile(
        r"^\s*([\w\-.\/\\]+\.\w+)\s*\n"  # Filename line (must have an extension)
        r"(.*?)"                        # Capture content (non-greedy)
        r"(?=\n\s*[\w\-.\/\\]+\.\w+\s*\n|\Z)", # Lookahead for next filename or end of string
        re.DOTALL | re.MULTILINE
    )
    files = pattern.findall(raw_response)

    cleaned_files = []
    for name, content in files:
        # Remove common code block markers (``` optionally followed by lang)
        content_cleaned = re.sub(r"^\s*```[a-zA-Z]*\n?", "", content, flags=re.MULTILINE)
        content_cleaned = re.sub(r"\n?```\s*$", "", content_cleaned, flags=re.MULTILINE)
        cleaned_files.append((name.strip(), content_cleaned.strip()))

    # Fallback if no files parsed but content exists
    if not cleaned_files and raw_response.strip():
        if any(c in raw_response for c in ['<','>','{','}',';','(',')']):
            print("Warning: No filenames found, defaulting to index.html")
            lang = "html"
            if "{" in raw_response and "}" in raw_response and ":" in raw_response: lang = "css"
            elif "function" in raw_response or "const" in raw_response or "let" in raw_response: lang = "javascript"
            default_filename = "index.html"
            if lang == "css": default_filename = "style.css"
            elif lang == "javascript": default_filename = "script.js"
            cleaned_files.append((default_filename, raw_response.strip()))

    return cleaned_files

# --- Streaming and Parsing Orchestrator ---
def stream_and_parse_code(prompt, backend, system_message, max_tokens, temperature, top_p):
    """
    Streams raw output to one component and generates final tabs for another.
    This function acts as the main callback for the button click.
    Yields dictionary updates for Gradio components.
    """
    # Check if client initialized correctly
    if not client:
        error_msg = "Error: Inference Client not available. Check API token or model name."
        # Yield updates to both components indicating the error
        yield {
            live_output: gr.update(value=error_msg),
            final_tabs: gr.Tabs(tabs=[gr.TabItem(label="Error", children=[gr.Textbox(value=error_msg)])])
        }
        return # Stop execution

    # --- Prepare for Streaming ---
    # Construct the system prompt dynamically
    full_sys_msg = f"""
You are a code generation AI. Given a prompt, generate the necessary files for a website using the {backend} backend.
Always include an index.html file.
Respond ONLY with filenames and the raw code for each file.
Each file must start with its filename on a new line. Example:

index.html
<!DOCTYPE html>
<html></html>

style.css
body {{}}

script.js
console.log("Hello");

Ensure the code is complete. NO commentary, NO explanations, NO markdown formatting like backticks (```).
Start generating the files now.
""".strip()
    if system_message: # Append user's system message if provided
        full_sys_msg += "\n\n" + system_message

    messages = [
        {"role": "system", "content": full_sys_msg},
        {"role": "user", "content": prompt}
    ]

    full_raw_response = ""
    error_occurred = False
    error_message = ""

    # Initial state update: Clear previous output and show generating status
    yield {
        live_output: gr.update(value="Generating stream..."),
        final_tabs: gr.Tabs(tabs=[gr.TabItem(label="Generating...")]) # Indicate loading in tabs
    }

    # --- Streaming Loop ---
    try:
        # Start the streaming call
        stream = client.chat_completion(
            messages,
            max_tokens=int(max_tokens), # Ensure max_tokens is an integer
            stream=True,
            temperature=temperature,
            top_p=top_p
        )
        # Process each chunk received from the stream
        for chunk in stream:
            token = chunk.choices[0].delta.content
            if token:
                full_raw_response += token
                # Yield updates for the live raw output component only
                # Keep tabs in a 'streaming' state during the stream
                yield {
                    live_output: gr.update(value=full_raw_response),
                    # No update needed for final_tabs here, or keep showing streaming state
                    # final_tabs: gr.Tabs(tabs=[gr.TabItem(label="Streaming...")]) # Optional: update tabs state
                }
                # time.sleep(0.01) # Optional: small delay if updates are too fast and causing UI lag

    except Exception as e:
        # Handle errors during the API call or streaming process
        print(f"Error during AI streaming: {e}")
        error_message = f"Error during AI generation: {e}\n\nPartial Response (if any):\n{full_raw_response}"
        error_occurred = True
        # Update live output with error, prepare error tab
        yield {
            live_output: gr.update(value=error_message),
            final_tabs: gr.Tabs(tabs=[gr.TabItem(label="Error")]) # Indicate error state in tabs
        }

    # --- Post-Streaming: Parsing and Final Tab Generation ---
    if error_occurred:
        # If an error happened during stream, create a final error tab
         final_tabs_update = gr.Tabs(tabs=[
            gr.TabItem(label="Error", children=[gr.Textbox(value=error_message, label="Generation Error", lines=10)])
        ])
    else:
        # If streaming succeeded, parse the complete raw response
        print("\n--- Final Raw AI Response ---")
        print(full_raw_response)
        print("--------------------------\n")
        files = parse_files(full_raw_response)

        if not files:
            # Handle case where parsing failed or AI gave empty/invalid response
            no_files_msg = "AI finished, but did not return recognizable file content or the response was empty. See raw output above."
            final_tabs_update = gr.Tabs(tabs=[
                gr.TabItem(label="Output", children=[gr.Textbox(value=no_files_msg, label="Result")])
            ])
            # Update live output as well to make the message clear
            yield { live_output: gr.update(value=full_raw_response + "\n\n" + no_files_msg), final_tabs: final_tabs_update }
            return # Exit if no files

        # --- Create Tabs (if files were parsed successfully) ---
        tabs_content = []
        for name, content in files:
            name = name.strip()
            content = content.strip()
            # Skip if filename or content is empty after stripping
            if not name or not content:
                print(f"Skipping file with empty name or content: Name='{name}'")
                continue

            # Determine language for syntax highlighting
            lang = "plaintext" # Default
            if name.endswith((".html", ".htm")): lang = "html"
            elif name.endswith(".css"): lang = "css"
            elif name.endswith(".js"): lang = "javascript"
            elif name.endswith(".py"): lang = "python"
            elif name.endswith(".json"): lang = "json"
            elif name.endswith(".md"): lang = "markdown"
            elif name.endswith((".sh", ".bash")): lang = "bash"
            elif name.endswith((".xml", ".xaml", ".svg")): lang = "xml"
            elif name.endswith(".yaml") or name.endswith(".yml"): lang = "yaml"

            # Ensure elem_id is unique and valid (replace problematic characters)
            elem_id = f"tab_{re.sub(r'[^a-zA-Z0-9_-]', '_', name)}"

            tab_item = gr.TabItem(label=name, elem_id=elem_id, children=[
                gr.Code(value=content, language=lang, label=name, interactive=False) # Show code in Code block
            ])
            tabs_content.append(tab_item)

        # Handle case where parsing found files, but they were all filtered out (empty name/content)
        if not tabs_content:
             final_tabs_update = gr.Tabs(tabs=[gr.TabItem(label="Output", children=[gr.Textbox(value="No valid files generated after filtering.", label="Result")])])
        else:
            final_tabs_update = gr.Tabs(tabs=tabs_content) # Create the final Tabs component with content

    # --- Final Update ---
    # Yield the final state for both components
    # Use gr.update for live_output if you only want to set its value without recreating it
    # Directly return the new final_tabs component
    yield {
        live_output: gr.update(value=full_raw_response if not error_occurred else error_message), # Show final raw response or error
        final_tabs: final_tabs_update # Update the final_tabs component completely
    }


# --- Gradio UI Definition ---
with gr.Blocks(css=".gradio-container { max-width: 95% !important; }") as demo: # Use more screen width
    gr.Markdown("## WebGen AI β€” One Prompt β†’ Full Website Generator")
    gr.Markdown("Generates website code based on your description. Raw output streams live, final files appear in tabs below.")

    with gr.Row():
        # Column for inputs and controls
        with gr.Column(scale=2):
            prompt = gr.Textbox(
                label="Describe your website",
                placeholder="E.g., a simple landing page for a coffee shop with sections for menu, about, and contact.",
                lines=3 # Allow more lines for the prompt
            )
            backend = gr.Dropdown(
                ["Static", "Flask", "Node.js"],
                value="Static",
                label="Backend Technology"
            )
            with gr.Accordion("Advanced Options", open=False):
                system_message = gr.Textbox(
                    label="Extra instructions for the AI (System Message)",
                    placeholder="Optional: e.g., 'Use Tailwind CSS for styling', 'Make it responsive'",
                    value="",
                    lines=2
                 )
                max_tokens = gr.Slider(
                    minimum=256,
                    maximum=4096, # Increased max tokens for complex sites
                    value=2048, # Increased default
                    step=64,
                    label="Max Tokens (Output Length)"
                )
                temperature = gr.Slider(
                    minimum=0.1,
                    maximum=1.5, # Allow slightly higher temperature
                    value=0.7,
                    step=0.1,
                    label="Temperature (Creativity)"
                )
                top_p = gr.Slider(
                    minimum=0.1,
                    maximum=1.0,
                    value=0.95,
                    step=0.05,
                    label="Top-p (Sampling Focus)"
                )
            generate_button = gr.Button("✨ Generate Code ✨", variant="primary") # Make button primary

        # Column for live output
        with gr.Column(scale=3):
            gr.Markdown("#### Live Raw Output Stream")
            # Component to show the live, unparsed stream - CORRECTED LANGUAGE
            live_output = gr.Code(
                label="Raw AI Stream",
                language="plaintext", # Use "plaintext" for generic text
                lines=20, # Increased lines for visibility
                interactive=False # Output only
            )

    gr.Markdown("---") # Separator
    gr.Markdown("#### Final Generated Files (Tabs)")
    # Placeholder for the final structured tabs - will be replaced by the output yield
    final_tabs = gr.Tabs(elem_id="output_tabs")


    # Button click action - uses the orchestrator function
    generate_button.click(
        stream_and_parse_code, # Call the main function that handles streaming and parsing
        inputs=[prompt, backend, system_message, max_tokens, temperature, top_p],
        # Outputs dictionary maps function yields to components by variable name
        outputs=[live_output, final_tabs],
        show_progress="hidden" # Hide default Gradio progress bar as we show live stream
    )

if __name__ == "__main__":
    # Launch the Gradio app with debug=True for development
    demo.launch(debug=True)